powo_ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../errors"
4
+
5
+ module PowoRuby
6
+ module RequestSupport
7
+ # Encapsulates retry behavior for HTTP calls.
8
+ #
9
+ # This is used by {PowoRuby::Request} to retry on:
10
+ # - {PowoRuby::RateLimitedError} (HTTP 429)
11
+ # - {PowoRuby::ServerError} (HTTP 5xx)
12
+ #
13
+ # Retries use exponential backoff with a small jitter and respect `Retry-After` when present.
14
+ class RetryPolicy
15
+ # @param enabled [Boolean] enable retry behavior
16
+ # @param max_retries [Integer] number of retries (not counting the first attempt)
17
+ # @param backoff_base [Numeric] base seconds for backoff
18
+ # @param backoff_max [Numeric] max seconds for backoff
19
+ # @param logger [#warn, nil] optional logger
20
+ def initialize(enabled:, max_retries:, backoff_base:, backoff_max:, logger: nil)
21
+ @enabled = enabled ? true : false
22
+ @max_retries = Integer(max_retries)
23
+ @backoff_base = Float(backoff_base)
24
+ @backoff_max = Float(backoff_max)
25
+ @logger = logger
26
+ end
27
+
28
+ # Execute a block with retry logic.
29
+ #
30
+ # @param method [Symbol, String]
31
+ # @param url [String]
32
+ # @yieldreturn [Object]
33
+ # @return [Object]
34
+ def with_retry(method:, url:)
35
+ attempt = 0
36
+ max_attempts = @max_retries + 1
37
+
38
+ begin
39
+ attempt += 1
40
+ yield
41
+ rescue RateLimitedError, ServerError => e
42
+ raise e unless @enabled
43
+ raise e if attempt >= max_attempts
44
+
45
+ sleep_seconds = retry_sleep_seconds(e, attempt: attempt)
46
+ warn_log(
47
+ "Retrying #{method.to_s.upcase} #{url} in #{format("%.2f", sleep_seconds)}s " \
48
+ "(attempt #{attempt}/#{max_attempts})"
49
+ )
50
+ sleep(sleep_seconds)
51
+ retry
52
+ end
53
+ end
54
+
55
+ private
56
+
57
+ def warn_log(message)
58
+ return unless @logger
59
+ return unless @logger.respond_to?(:warn)
60
+
61
+ @logger.warn(message)
62
+ end
63
+
64
+ def retry_sleep_seconds(error, attempt:)
65
+ header_seconds = retry_after_seconds(error)
66
+ return header_seconds if header_seconds
67
+
68
+ # Exponential backoff with small jitter.
69
+ exp = @backoff_base * (2**(attempt - 1))
70
+ jitter = rand * 0.25
71
+ [exp + jitter, @backoff_max].min
72
+ end
73
+
74
+ def retry_after_seconds(error)
75
+ return nil unless error.is_a?(RateLimitedError)
76
+
77
+ raw = error.headers && (error.headers["retry-after"] || error.headers["Retry-After"])
78
+ return nil if raw.to_s.strip.empty?
79
+
80
+ Float(raw)
81
+ rescue ArgumentError
82
+ nil
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../response"
4
+ require_relative "../validation"
5
+
6
+ module PowoRuby
7
+ module Resources
8
+ # Endpoint wrapper around POWO's `/search` resource.
9
+ #
10
+ # Responsibilities:
11
+ # - validate user input
12
+ # - shape/normalize params for the API
13
+ # - provide cursor-based enumerators
14
+ #
15
+ # This class is typically accessed via {PowoRuby::Client#search}.
16
+ class Search
17
+ DEFAULT_CURSOR = "*"
18
+ DEFAULT_PER_PAGE = 24
19
+
20
+ # @param request [PowoRuby::Request,#get] HTTP adapter used to call the API
21
+ # @param allowed_params [Set<Symbol>] allow-list of supported params for this mode
22
+ # @param group_keys [Array<Symbol>] keys that should be flattened when passed as grouped hashes
23
+ def initialize(request:, allowed_params:, group_keys:)
24
+ @request = request
25
+ @allowed_params = allowed_params
26
+ @group_keys = group_keys
27
+ end
28
+
29
+ # Perform a simple text search.
30
+ #
31
+ # @param query [String] the search query (mapped to `q`)
32
+ # @param filters [Hash] optional filter hash (validated against the allow-list)
33
+ # @param cursor [String] POWO cursor for pagination (default `*`)
34
+ # @param per_page [Integer] page size (mapped to `perPage`)
35
+ # @return [PowoRuby::Response]
36
+ #
37
+ # @example
38
+ # response = client.search.query(query: "Acacia", filters: { accepted: true })
39
+ # response.total_count
40
+ # response.results
41
+ def query(query:, filters: {}, cursor: DEFAULT_CURSOR, per_page: DEFAULT_PER_PAGE)
42
+ Validation.presence!(query, name: "query")
43
+ reject_page!(filters)
44
+
45
+ params = normalize_filters(filters, name: "filters")
46
+ params["q"] = query.to_s
47
+ params["cursor"] = normalize_cursor(cursor)
48
+ params["perPage"] = Integer(per_page)
49
+
50
+ Response.new(request.get("search", params: params))
51
+ end
52
+
53
+ # Perform an "advanced" search using a structured hash of terms.
54
+ #
55
+ # Supports both flat and grouped forms; grouped keys are flattened based on `group_keys`.
56
+ #
57
+ # @param params_hash [Hash] query terms / filters
58
+ # @return [PowoRuby::Response]
59
+ #
60
+ # @example (flat)
61
+ # client.search.advanced(family: "Fabaceae", accepted: true, limit: 24)
62
+ #
63
+ # @example (grouped)
64
+ # client.search.advanced(
65
+ # name: { genus: "Acacia", family: "Fabaceae" },
66
+ # accepted: true
67
+ # )
68
+ def advanced(params_hash)
69
+ Validation.hash!(params_hash, name: "params_hash")
70
+
71
+ flat = flatten_groups(params_hash)
72
+ reject_page!(flat)
73
+ params = normalize_filters(flat, name: "params_hash")
74
+
75
+ Response.new(request.get("search", params: params))
76
+ end
77
+
78
+ # Enumerate rows across cursor pages for a simple text search.
79
+ #
80
+ # @param query [String]
81
+ # @param filters [Hash]
82
+ # @param cursor [String]
83
+ # @param per_page [Integer]
84
+ # @return [Enumerator<Hash>]
85
+ #
86
+ # @example
87
+ # client.search.each(query: "Acacia", filters: { accepted: true }).take(50)
88
+ def each(query:, filters: {}, cursor: DEFAULT_CURSOR, per_page: DEFAULT_PER_PAGE)
89
+ Enumerator.new do |y|
90
+ current_cursor = normalize_cursor(cursor)
91
+
92
+ loop do
93
+ response = self.query(query: query, filters: filters, cursor: current_cursor, per_page: per_page)
94
+ response.each { |row| y << row }
95
+
96
+ break unless response.next_page?
97
+
98
+ raw_cursor = response.raw.is_a?(Hash) ? (response.raw["cursor"] || response.raw[:cursor]) : nil
99
+ break if raw_cursor.to_s.strip.empty? || raw_cursor.to_s == DEFAULT_CURSOR
100
+
101
+ current_cursor = raw_cursor.to_s
102
+ end
103
+ end
104
+ end
105
+
106
+ # Enumerate rows across cursor pages for an advanced search.
107
+ #
108
+ # `limit` is treated as a page-size hint (mapped to `perPage`).
109
+ #
110
+ # @param params_hash [Hash]
111
+ # @return [Enumerator<Hash>]
112
+ def advanced_each(params_hash)
113
+ flat = flatten_groups(params_hash)
114
+ reject_page!(flat)
115
+
116
+ initial_cursor = flat.key?(:cursor) ? flat[:cursor].to_s : DEFAULT_CURSOR
117
+ per_page =
118
+ if flat.key?(:perPage)
119
+ Integer(flat[:perPage])
120
+ elsif flat.key?(:limit)
121
+ Integer(flat[:limit])
122
+ else
123
+ DEFAULT_PER_PAGE
124
+ end
125
+
126
+ Enumerator.new do |y|
127
+ current_cursor = initial_cursor.to_s.strip.empty? ? DEFAULT_CURSOR : initial_cursor.to_s
128
+ params_for_call = flat.dup
129
+ params_for_call.delete(:cursor)
130
+ params_for_call.delete(:perPage)
131
+
132
+ loop do
133
+ response = advanced(params_for_call.merge(cursor: current_cursor, limit: per_page))
134
+ response.each { |row| y << row }
135
+
136
+ break unless response.next_page?
137
+
138
+ raw_cursor = response.raw.is_a?(Hash) ? (response.raw["cursor"] || response.raw[:cursor]) : nil
139
+ break if raw_cursor.to_s.strip.empty? || raw_cursor.to_s == DEFAULT_CURSOR
140
+
141
+ current_cursor = raw_cursor.to_s
142
+ end
143
+ end
144
+ end
145
+
146
+ private
147
+
148
+ attr_reader :request, :allowed_params, :group_keys
149
+
150
+ def reject_page!(hashish)
151
+ return unless hashish.is_a?(Hash)
152
+ return unless hashish.key?(:page) || hashish.key?("page")
153
+
154
+ raise ArgumentError,
155
+ "POWO search no longer supports page-based pagination. Remove :page and use :cursor instead."
156
+ end
157
+
158
+ def normalize_cursor(cursor)
159
+ cursor.to_s.strip.empty? ? DEFAULT_CURSOR : cursor.to_s
160
+ end
161
+
162
+ def flatten_groups(hash)
163
+ Validation.hash!(hash, name: "params_hash")
164
+
165
+ flat = {}
166
+ hash.each do |key, value|
167
+ sym_key = key.is_a?(Symbol) ? key : key.to_s.to_sym
168
+ if group_keys.include?(sym_key)
169
+ Validation.hash!(value, name: sym_key.to_s)
170
+ value.each { |k, v| flat[k.to_sym] = v }
171
+ else
172
+ flat[sym_key] = value
173
+ end
174
+ end
175
+ flat
176
+ end
177
+
178
+ def normalize_filters(input_hash, name:)
179
+ Validation.hash!(input_hash, name: name)
180
+
181
+ unknown =
182
+ input_hash.keys
183
+ .map { |k| k.is_a?(Symbol) ? k : k.to_s.to_sym }
184
+ .reject { |k| allowed_params.include?(k) }
185
+ unless unknown.empty?
186
+ supported = allowed_params.to_a.sort.map(&:inspect).join(", ")
187
+ message =
188
+ "Unsupported parameter(s): #{unknown.map(&:inspect).join(", ")}. Supported: [#{supported}]"
189
+ raise ValidationError, message
190
+ end
191
+
192
+ normalized = {}
193
+ input_hash.each do |key, value|
194
+ sym_key = key.is_a?(Symbol) ? key : key.to_s.to_sym
195
+ next if value.nil?
196
+
197
+ case sym_key
198
+ when :limit
199
+ normalized["perPage"] = Integer(value)
200
+ when :images
201
+ Validation.boolean!(value, name: "images")
202
+ normalized["f"] = "has_images" if value
203
+ when :accepted
204
+ Validation.boolean!(value, name: "accepted")
205
+ normalized["accepted"] = value
206
+ when :page
207
+ normalized["page"] = Integer(value)
208
+ else
209
+ normalized[sym_key.to_s] = value
210
+ end
211
+ end
212
+
213
+ normalized
214
+ end
215
+ end
216
+ end
217
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../response"
4
+ require_relative "../uri_utils"
5
+ require_relative "../validation"
6
+
7
+ module PowoRuby
8
+ module Resources
9
+ # Endpoint wrapper around POWO's `/taxon/<id>` resource.
10
+ #
11
+ # This class is typically accessed via {PowoRuby::Client#taxa}.
12
+ class Taxa
13
+ # @param request [PowoRuby::Request,#get] HTTP adapter used to call the API
14
+ def initialize(request:)
15
+ @request = request
16
+ end
17
+
18
+ # Lookup a taxon (or IPNI record) by its identifier.
19
+ #
20
+ # The id is URL-escaped before being inserted into the path.
21
+ #
22
+ # @param id [String] POWO/IPNI identifier (often a URN/LSID)
23
+ # @return [PowoRuby::Response]
24
+ #
25
+ # @example
26
+ # response = client.taxa.lookup("urn:lsid:ipni.org:names:30000618-2")
27
+ # response.raw #=> Hash
28
+ def lookup(id)
29
+ Validation.presence!(id, name: "id")
30
+ taxon_id = URIUtils.escape_path_segment(id.to_s)
31
+
32
+ Response.new(@request.get("taxon/#{taxon_id}", params: {}))
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PowoRuby
4
+ # Schema-flexible wrapper for POWO JSON responses.
5
+ #
6
+ # For search-like responses this exposes `results`, `total_count`, and pagination helpers.
7
+ #
8
+ # Note: POWO's API schema is not formally documented; this class keeps parsing conservative.
9
+ class Response
10
+ include Enumerable
11
+
12
+ # @param raw [Hash, Array, Object] parsed JSON (or already-decoded object) from the API
13
+ def initialize(raw)
14
+ @raw = raw
15
+ end
16
+
17
+ attr_reader :raw
18
+
19
+ # Array of result rows for search-like responses.
20
+ #
21
+ # @return [Array<Hash>]
22
+ def results
23
+ if raw.is_a?(Hash)
24
+ value =
25
+ raw["results"] || raw[:results]
26
+ end
27
+
28
+ value.is_a?(Array) ? value : []
29
+ end
30
+
31
+ # Total result count when present.
32
+ #
33
+ # POWO sometimes returns different keys depending on the endpoint / mode.
34
+ #
35
+ # @return [Integer, nil]
36
+ def total_count
37
+ return nil unless raw.is_a?(Hash)
38
+
39
+ raw["totalResults"] || raw[:totalResults] || raw["total"] || raw[:total]
40
+ end
41
+
42
+ # Whether another page is available.
43
+ #
44
+ # Supports both styles:
45
+ # - page/totalPages (legacy/page-based)
46
+ # - cursor-based paging (POWO search)
47
+ #
48
+ # @return [Boolean]
49
+ def next_page?
50
+ return false unless raw.is_a?(Hash)
51
+
52
+ page = raw["page"] || raw[:page]
53
+ total_pages = raw["totalPages"] || raw[:totalPages] || raw["pages"] || raw[:pages]
54
+ cursor = raw["cursor"] || raw[:cursor]
55
+
56
+ if page && total_pages
57
+ page.to_i < total_pages.to_i
58
+ elsif cursor
59
+ cursor.to_s != "*" && !cursor.to_s.empty?
60
+ else
61
+ false
62
+ end
63
+ end
64
+
65
+ def each(&block)
66
+ return enum_for(:each) unless block
67
+
68
+ results.each(&block)
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module PowoRuby
6
+ # Loads and exposes allow-lists of supported query parameters for POWO and IPNI.
7
+ #
8
+ # The allow-lists can be sourced from `docs/POWO_SEARCH_TERMS.md` (parsed at runtime) or,
9
+ # if that file can't be read, from built-in fallback lists.
10
+ class Terms
11
+ # Internal helper for allowed POWO/IPNI parameters.
12
+ POWO_NAME = %i[
13
+ full_name scientific_name genus species infraspecific family common_name author rank status
14
+ ].freeze
15
+ POWO_CHARACTERISTIC = %i[
16
+ summary appearance flower fruit leaf habit habitat use conservation
17
+ ].freeze
18
+ POWO_GEOGRAPHY = %i[
19
+ distribution native_distribution introduced_distribution region continent country
20
+ ].freeze
21
+ POWO_ADDITIONAL = %i[
22
+ accepted images page limit sort
23
+ ].freeze
24
+
25
+ IPNI_NAME = %i[
26
+ genus species infraspecific_rank infraspecific_name family publication_year full_name
27
+ ].freeze
28
+ IPNI_AUTHOR = %i[
29
+ author standard_form collaboration
30
+ ].freeze
31
+ IPNI_PUBLICATION = %i[
32
+ publication_title publication_year publication_place publisher
33
+ ].freeze
34
+
35
+ POWO_GROUP_HEADERS = {
36
+ "name terms" => :name,
37
+ "characteristic terms" => :characteristic,
38
+ "geography terms" => :geography,
39
+ "additional filters" => :additional
40
+ }.freeze
41
+
42
+ IPNI_GROUP_HEADERS = {
43
+ "name terms" => :name,
44
+ "author terms" => :author,
45
+ "publication terms" => :publication
46
+ }.freeze
47
+
48
+ # Load terms from a markdown file when possible, otherwise fall back to defaults.
49
+ #
50
+ # @param path [String] path to `POWO_SEARCH_TERMS.md`
51
+ # @return [PowoRuby::Terms]
52
+ def self.load(path)
53
+ parsed = parse_markdown(path)
54
+ parsed || new(source_path: path)
55
+ end
56
+
57
+ # Parse a terms markdown file.
58
+ #
59
+ # @param path [String]
60
+ # @return [PowoRuby::Terms, nil] nil if unreadable/empty
61
+ def self.parse_markdown(path)
62
+ return nil unless path && File.file?(path)
63
+
64
+ content = File.read(path)
65
+ return nil if content.strip.empty?
66
+
67
+ section = :powo
68
+ group = nil
69
+
70
+ powo = Hash.new { |h, k| h[k] = [] }
71
+ ipni = Hash.new { |h, k| h[k] = [] }
72
+
73
+ content.each_line do |line|
74
+ line = line.strip
75
+ next if line.empty?
76
+
77
+ if line == "# IPNI Search Terms"
78
+ section = :ipni
79
+ group = nil
80
+ next
81
+ end
82
+
83
+ if line.start_with?("## ")
84
+ header = line.delete_prefix("## ").downcase
85
+ group =
86
+ if section == :powo
87
+ POWO_GROUP_HEADERS[header]
88
+ else
89
+ IPNI_GROUP_HEADERS[header]
90
+ end
91
+ next
92
+ end
93
+
94
+ next unless line.start_with?("- ")
95
+ next unless group
96
+
97
+ term = line.delete_prefix("- ").strip
98
+ term = term.split(/\s+/).first
99
+ next if term.nil? || term.empty?
100
+
101
+ sym = term.downcase.to_sym
102
+ if section == :powo
103
+ powo[group] << sym
104
+ else
105
+ ipni[group] << sym
106
+ end
107
+ end
108
+
109
+ new(
110
+ source_path: path,
111
+ powo: powo.transform_values { |v| v.uniq.freeze }.freeze,
112
+ ipni: ipni.transform_values { |v| v.uniq.freeze }.freeze
113
+ )
114
+ rescue Errno::ENOENT, Errno::EACCES
115
+ nil
116
+ end
117
+
118
+ def initialize(source_path:, powo: nil, ipni: nil)
119
+ @source_path = source_path
120
+ @powo = powo || {
121
+ name: POWO_NAME,
122
+ characteristic: POWO_CHARACTERISTIC,
123
+ geography: POWO_GEOGRAPHY,
124
+ additional: POWO_ADDITIONAL
125
+ }.freeze
126
+ @ipni = ipni || {
127
+ name: IPNI_NAME,
128
+ author: IPNI_AUTHOR,
129
+ publication: IPNI_PUBLICATION
130
+ }.freeze
131
+ end
132
+
133
+ attr_reader :source_path
134
+
135
+ # Allow-list of supported POWO params as symbols.
136
+ #
137
+ # @return [Set<Symbol>]
138
+ def powo_allowed_params
139
+ @powo.values.flatten.to_set
140
+ end
141
+
142
+ # Allow-list of supported IPNI params as symbols.
143
+ #
144
+ # @return [Set<Symbol>]
145
+ def ipni_allowed_params
146
+ @ipni.values.flatten.to_set
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+
5
+ module PowoRuby
6
+ # Small URI helper utilities.
7
+ module URIUtils
8
+ module_function
9
+
10
+ # Escape a value for safe inclusion in a URL path segment.
11
+ #
12
+ # This is used for `/taxon/<id>` lookups, where IDs can include characters like `/` or `:`.
13
+ #
14
+ # @param value [String]
15
+ # @return [String]
16
+ def escape_path_segment(value)
17
+ URI::DEFAULT_PARSER.escape(value, /[^A-Za-z0-9\-._~]/)
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PowoRuby
4
+ # Tiny validation helpers used by the public API.
5
+ #
6
+ # These are intentionally small and strict: they fail fast with {PowoRuby::ValidationError}
7
+ # to keep endpoint methods predictable for callers.
8
+ module Validation
9
+ module_function
10
+
11
+ # Validate that a value is present.
12
+ #
13
+ # @param value [Object]
14
+ # @param name [String] parameter name for error messages
15
+ # @return [void]
16
+ def presence!(value, name:)
17
+ return unless value.nil? || value.to_s.strip.empty?
18
+
19
+ raise ValidationError, "#{name} must be provided"
20
+ end
21
+
22
+ # Validate that a value is a Hash.
23
+ #
24
+ # @param value [Object]
25
+ # @param name [String]
26
+ # @return [void]
27
+ def hash!(value, name:)
28
+ return if value.is_a?(Hash)
29
+
30
+ raise ValidationError, "#{name} must be a Hash"
31
+ end
32
+
33
+ # Validate that a value is boolean (true/false).
34
+ #
35
+ # @param value [Object]
36
+ # @param name [String]
37
+ # @return [void]
38
+ def boolean!(value, name:)
39
+ return if [true, false].include?(value)
40
+
41
+ raise ValidationError, "#{name} must be boolean (true/false)"
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PowoRuby
4
+ # Gem version.
5
+ VERSION = "0.1.0"
6
+ end