powo_ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +38 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE.txt +21 -0
- data/README.md +201 -0
- data/Rakefile +12 -0
- data/docs/POWO_SEARCH_TERMS.md +63 -0
- data/exe/powo_ruby +134 -0
- data/lib/powo_ruby/client.rb +121 -0
- data/lib/powo_ruby/client_resolver.rb +37 -0
- data/lib/powo_ruby/configuration.rb +69 -0
- data/lib/powo_ruby/errors.rb +52 -0
- data/lib/powo_ruby/paginator.rb +41 -0
- data/lib/powo_ruby/request.rb +146 -0
- data/lib/powo_ruby/request_support/cache_key_builder.rb +80 -0
- data/lib/powo_ruby/request_support/cache_store.rb +37 -0
- data/lib/powo_ruby/request_support/response_handler.rb +82 -0
- data/lib/powo_ruby/request_support/retry_policy.rb +86 -0
- data/lib/powo_ruby/resources/search.rb +217 -0
- data/lib/powo_ruby/resources/taxa.rb +36 -0
- data/lib/powo_ruby/response.rb +71 -0
- data/lib/powo_ruby/terms.rb +149 -0
- data/lib/powo_ruby/uri_utils.rb +20 -0
- data/lib/powo_ruby/validation.rb +44 -0
- data/lib/powo_ruby/version.rb +6 -0
- data/lib/powo_ruby.rb +109 -0
- data/sig/powo_ruby.rbs +6 -0
- metadata +85 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../errors"
|
|
4
|
+
|
|
5
|
+
module PowoRuby
|
|
6
|
+
module RequestSupport
|
|
7
|
+
# Encapsulates retry behavior for HTTP calls.
|
|
8
|
+
#
|
|
9
|
+
# This is used by {PowoRuby::Request} to retry on:
|
|
10
|
+
# - {PowoRuby::RateLimitedError} (HTTP 429)
|
|
11
|
+
# - {PowoRuby::ServerError} (HTTP 5xx)
|
|
12
|
+
#
|
|
13
|
+
# Retries use exponential backoff with a small jitter and respect `Retry-After` when present.
|
|
14
|
+
class RetryPolicy
|
|
15
|
+
# @param enabled [Boolean] enable retry behavior
|
|
16
|
+
# @param max_retries [Integer] number of retries (not counting the first attempt)
|
|
17
|
+
# @param backoff_base [Numeric] base seconds for backoff
|
|
18
|
+
# @param backoff_max [Numeric] max seconds for backoff
|
|
19
|
+
# @param logger [#warn, nil] optional logger
|
|
20
|
+
def initialize(enabled:, max_retries:, backoff_base:, backoff_max:, logger: nil)
|
|
21
|
+
@enabled = enabled ? true : false
|
|
22
|
+
@max_retries = Integer(max_retries)
|
|
23
|
+
@backoff_base = Float(backoff_base)
|
|
24
|
+
@backoff_max = Float(backoff_max)
|
|
25
|
+
@logger = logger
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Execute a block with retry logic.
|
|
29
|
+
#
|
|
30
|
+
# @param method [Symbol, String]
|
|
31
|
+
# @param url [String]
|
|
32
|
+
# @yieldreturn [Object]
|
|
33
|
+
# @return [Object]
|
|
34
|
+
def with_retry(method:, url:)
|
|
35
|
+
attempt = 0
|
|
36
|
+
max_attempts = @max_retries + 1
|
|
37
|
+
|
|
38
|
+
begin
|
|
39
|
+
attempt += 1
|
|
40
|
+
yield
|
|
41
|
+
rescue RateLimitedError, ServerError => e
|
|
42
|
+
raise e unless @enabled
|
|
43
|
+
raise e if attempt >= max_attempts
|
|
44
|
+
|
|
45
|
+
sleep_seconds = retry_sleep_seconds(e, attempt: attempt)
|
|
46
|
+
warn_log(
|
|
47
|
+
"Retrying #{method.to_s.upcase} #{url} in #{format("%.2f", sleep_seconds)}s " \
|
|
48
|
+
"(attempt #{attempt}/#{max_attempts})"
|
|
49
|
+
)
|
|
50
|
+
sleep(sleep_seconds)
|
|
51
|
+
retry
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def warn_log(message)
|
|
58
|
+
return unless @logger
|
|
59
|
+
return unless @logger.respond_to?(:warn)
|
|
60
|
+
|
|
61
|
+
@logger.warn(message)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def retry_sleep_seconds(error, attempt:)
|
|
65
|
+
header_seconds = retry_after_seconds(error)
|
|
66
|
+
return header_seconds if header_seconds
|
|
67
|
+
|
|
68
|
+
# Exponential backoff with small jitter.
|
|
69
|
+
exp = @backoff_base * (2**(attempt - 1))
|
|
70
|
+
jitter = rand * 0.25
|
|
71
|
+
[exp + jitter, @backoff_max].min
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def retry_after_seconds(error)
|
|
75
|
+
return nil unless error.is_a?(RateLimitedError)
|
|
76
|
+
|
|
77
|
+
raw = error.headers && (error.headers["retry-after"] || error.headers["Retry-After"])
|
|
78
|
+
return nil if raw.to_s.strip.empty?
|
|
79
|
+
|
|
80
|
+
Float(raw)
|
|
81
|
+
rescue ArgumentError
|
|
82
|
+
nil
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../response"
|
|
4
|
+
require_relative "../validation"
|
|
5
|
+
|
|
6
|
+
module PowoRuby
|
|
7
|
+
module Resources
|
|
8
|
+
# Endpoint wrapper around POWO's `/search` resource.
|
|
9
|
+
#
|
|
10
|
+
# Responsibilities:
|
|
11
|
+
# - validate user input
|
|
12
|
+
# - shape/normalize params for the API
|
|
13
|
+
# - provide cursor-based enumerators
|
|
14
|
+
#
|
|
15
|
+
# This class is typically accessed via {PowoRuby::Client#search}.
|
|
16
|
+
class Search
|
|
17
|
+
DEFAULT_CURSOR = "*"
|
|
18
|
+
DEFAULT_PER_PAGE = 24
|
|
19
|
+
|
|
20
|
+
# @param request [PowoRuby::Request,#get] HTTP adapter used to call the API
|
|
21
|
+
# @param allowed_params [Set<Symbol>] allow-list of supported params for this mode
|
|
22
|
+
# @param group_keys [Array<Symbol>] keys that should be flattened when passed as grouped hashes
|
|
23
|
+
def initialize(request:, allowed_params:, group_keys:)
|
|
24
|
+
@request = request
|
|
25
|
+
@allowed_params = allowed_params
|
|
26
|
+
@group_keys = group_keys
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Perform a simple text search.
|
|
30
|
+
#
|
|
31
|
+
# @param query [String] the search query (mapped to `q`)
|
|
32
|
+
# @param filters [Hash] optional filter hash (validated against the allow-list)
|
|
33
|
+
# @param cursor [String] POWO cursor for pagination (default `*`)
|
|
34
|
+
# @param per_page [Integer] page size (mapped to `perPage`)
|
|
35
|
+
# @return [PowoRuby::Response]
|
|
36
|
+
#
|
|
37
|
+
# @example
|
|
38
|
+
# response = client.search.query(query: "Acacia", filters: { accepted: true })
|
|
39
|
+
# response.total_count
|
|
40
|
+
# response.results
|
|
41
|
+
def query(query:, filters: {}, cursor: DEFAULT_CURSOR, per_page: DEFAULT_PER_PAGE)
|
|
42
|
+
Validation.presence!(query, name: "query")
|
|
43
|
+
reject_page!(filters)
|
|
44
|
+
|
|
45
|
+
params = normalize_filters(filters, name: "filters")
|
|
46
|
+
params["q"] = query.to_s
|
|
47
|
+
params["cursor"] = normalize_cursor(cursor)
|
|
48
|
+
params["perPage"] = Integer(per_page)
|
|
49
|
+
|
|
50
|
+
Response.new(request.get("search", params: params))
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Perform an "advanced" search using a structured hash of terms.
|
|
54
|
+
#
|
|
55
|
+
# Supports both flat and grouped forms; grouped keys are flattened based on `group_keys`.
|
|
56
|
+
#
|
|
57
|
+
# @param params_hash [Hash] query terms / filters
|
|
58
|
+
# @return [PowoRuby::Response]
|
|
59
|
+
#
|
|
60
|
+
# @example (flat)
|
|
61
|
+
# client.search.advanced(family: "Fabaceae", accepted: true, limit: 24)
|
|
62
|
+
#
|
|
63
|
+
# @example (grouped)
|
|
64
|
+
# client.search.advanced(
|
|
65
|
+
# name: { genus: "Acacia", family: "Fabaceae" },
|
|
66
|
+
# accepted: true
|
|
67
|
+
# )
|
|
68
|
+
def advanced(params_hash)
|
|
69
|
+
Validation.hash!(params_hash, name: "params_hash")
|
|
70
|
+
|
|
71
|
+
flat = flatten_groups(params_hash)
|
|
72
|
+
reject_page!(flat)
|
|
73
|
+
params = normalize_filters(flat, name: "params_hash")
|
|
74
|
+
|
|
75
|
+
Response.new(request.get("search", params: params))
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Enumerate rows across cursor pages for a simple text search.
|
|
79
|
+
#
|
|
80
|
+
# @param query [String]
|
|
81
|
+
# @param filters [Hash]
|
|
82
|
+
# @param cursor [String]
|
|
83
|
+
# @param per_page [Integer]
|
|
84
|
+
# @return [Enumerator<Hash>]
|
|
85
|
+
#
|
|
86
|
+
# @example
|
|
87
|
+
# client.search.each(query: "Acacia", filters: { accepted: true }).take(50)
|
|
88
|
+
def each(query:, filters: {}, cursor: DEFAULT_CURSOR, per_page: DEFAULT_PER_PAGE)
|
|
89
|
+
Enumerator.new do |y|
|
|
90
|
+
current_cursor = normalize_cursor(cursor)
|
|
91
|
+
|
|
92
|
+
loop do
|
|
93
|
+
response = self.query(query: query, filters: filters, cursor: current_cursor, per_page: per_page)
|
|
94
|
+
response.each { |row| y << row }
|
|
95
|
+
|
|
96
|
+
break unless response.next_page?
|
|
97
|
+
|
|
98
|
+
raw_cursor = response.raw.is_a?(Hash) ? (response.raw["cursor"] || response.raw[:cursor]) : nil
|
|
99
|
+
break if raw_cursor.to_s.strip.empty? || raw_cursor.to_s == DEFAULT_CURSOR
|
|
100
|
+
|
|
101
|
+
current_cursor = raw_cursor.to_s
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Enumerate rows across cursor pages for an advanced search.
|
|
107
|
+
#
|
|
108
|
+
# `limit` is treated as a page-size hint (mapped to `perPage`).
|
|
109
|
+
#
|
|
110
|
+
# @param params_hash [Hash]
|
|
111
|
+
# @return [Enumerator<Hash>]
|
|
112
|
+
def advanced_each(params_hash)
|
|
113
|
+
flat = flatten_groups(params_hash)
|
|
114
|
+
reject_page!(flat)
|
|
115
|
+
|
|
116
|
+
initial_cursor = flat.key?(:cursor) ? flat[:cursor].to_s : DEFAULT_CURSOR
|
|
117
|
+
per_page =
|
|
118
|
+
if flat.key?(:perPage)
|
|
119
|
+
Integer(flat[:perPage])
|
|
120
|
+
elsif flat.key?(:limit)
|
|
121
|
+
Integer(flat[:limit])
|
|
122
|
+
else
|
|
123
|
+
DEFAULT_PER_PAGE
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
Enumerator.new do |y|
|
|
127
|
+
current_cursor = initial_cursor.to_s.strip.empty? ? DEFAULT_CURSOR : initial_cursor.to_s
|
|
128
|
+
params_for_call = flat.dup
|
|
129
|
+
params_for_call.delete(:cursor)
|
|
130
|
+
params_for_call.delete(:perPage)
|
|
131
|
+
|
|
132
|
+
loop do
|
|
133
|
+
response = advanced(params_for_call.merge(cursor: current_cursor, limit: per_page))
|
|
134
|
+
response.each { |row| y << row }
|
|
135
|
+
|
|
136
|
+
break unless response.next_page?
|
|
137
|
+
|
|
138
|
+
raw_cursor = response.raw.is_a?(Hash) ? (response.raw["cursor"] || response.raw[:cursor]) : nil
|
|
139
|
+
break if raw_cursor.to_s.strip.empty? || raw_cursor.to_s == DEFAULT_CURSOR
|
|
140
|
+
|
|
141
|
+
current_cursor = raw_cursor.to_s
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
private
|
|
147
|
+
|
|
148
|
+
attr_reader :request, :allowed_params, :group_keys
|
|
149
|
+
|
|
150
|
+
def reject_page!(hashish)
|
|
151
|
+
return unless hashish.is_a?(Hash)
|
|
152
|
+
return unless hashish.key?(:page) || hashish.key?("page")
|
|
153
|
+
|
|
154
|
+
raise ArgumentError,
|
|
155
|
+
"POWO search no longer supports page-based pagination. Remove :page and use :cursor instead."
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def normalize_cursor(cursor)
|
|
159
|
+
cursor.to_s.strip.empty? ? DEFAULT_CURSOR : cursor.to_s
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def flatten_groups(hash)
|
|
163
|
+
Validation.hash!(hash, name: "params_hash")
|
|
164
|
+
|
|
165
|
+
flat = {}
|
|
166
|
+
hash.each do |key, value|
|
|
167
|
+
sym_key = key.is_a?(Symbol) ? key : key.to_s.to_sym
|
|
168
|
+
if group_keys.include?(sym_key)
|
|
169
|
+
Validation.hash!(value, name: sym_key.to_s)
|
|
170
|
+
value.each { |k, v| flat[k.to_sym] = v }
|
|
171
|
+
else
|
|
172
|
+
flat[sym_key] = value
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
flat
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def normalize_filters(input_hash, name:)
|
|
179
|
+
Validation.hash!(input_hash, name: name)
|
|
180
|
+
|
|
181
|
+
unknown =
|
|
182
|
+
input_hash.keys
|
|
183
|
+
.map { |k| k.is_a?(Symbol) ? k : k.to_s.to_sym }
|
|
184
|
+
.reject { |k| allowed_params.include?(k) }
|
|
185
|
+
unless unknown.empty?
|
|
186
|
+
supported = allowed_params.to_a.sort.map(&:inspect).join(", ")
|
|
187
|
+
message =
|
|
188
|
+
"Unsupported parameter(s): #{unknown.map(&:inspect).join(", ")}. Supported: [#{supported}]"
|
|
189
|
+
raise ValidationError, message
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
normalized = {}
|
|
193
|
+
input_hash.each do |key, value|
|
|
194
|
+
sym_key = key.is_a?(Symbol) ? key : key.to_s.to_sym
|
|
195
|
+
next if value.nil?
|
|
196
|
+
|
|
197
|
+
case sym_key
|
|
198
|
+
when :limit
|
|
199
|
+
normalized["perPage"] = Integer(value)
|
|
200
|
+
when :images
|
|
201
|
+
Validation.boolean!(value, name: "images")
|
|
202
|
+
normalized["f"] = "has_images" if value
|
|
203
|
+
when :accepted
|
|
204
|
+
Validation.boolean!(value, name: "accepted")
|
|
205
|
+
normalized["accepted"] = value
|
|
206
|
+
when :page
|
|
207
|
+
normalized["page"] = Integer(value)
|
|
208
|
+
else
|
|
209
|
+
normalized[sym_key.to_s] = value
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
normalized
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../response"
|
|
4
|
+
require_relative "../uri_utils"
|
|
5
|
+
require_relative "../validation"
|
|
6
|
+
|
|
7
|
+
module PowoRuby
|
|
8
|
+
module Resources
|
|
9
|
+
# Endpoint wrapper around POWO's `/taxon/<id>` resource.
|
|
10
|
+
#
|
|
11
|
+
# This class is typically accessed via {PowoRuby::Client#taxa}.
|
|
12
|
+
class Taxa
|
|
13
|
+
# @param request [PowoRuby::Request,#get] HTTP adapter used to call the API
|
|
14
|
+
def initialize(request:)
|
|
15
|
+
@request = request
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Lookup a taxon (or IPNI record) by its identifier.
|
|
19
|
+
#
|
|
20
|
+
# The id is URL-escaped before being inserted into the path.
|
|
21
|
+
#
|
|
22
|
+
# @param id [String] POWO/IPNI identifier (often a URN/LSID)
|
|
23
|
+
# @return [PowoRuby::Response]
|
|
24
|
+
#
|
|
25
|
+
# @example
|
|
26
|
+
# response = client.taxa.lookup("urn:lsid:ipni.org:names:30000618-2")
|
|
27
|
+
# response.raw #=> Hash
|
|
28
|
+
def lookup(id)
|
|
29
|
+
Validation.presence!(id, name: "id")
|
|
30
|
+
taxon_id = URIUtils.escape_path_segment(id.to_s)
|
|
31
|
+
|
|
32
|
+
Response.new(@request.get("taxon/#{taxon_id}", params: {}))
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PowoRuby
|
|
4
|
+
# Schema-flexible wrapper for POWO JSON responses.
|
|
5
|
+
#
|
|
6
|
+
# For search-like responses this exposes `results`, `total_count`, and pagination helpers.
|
|
7
|
+
#
|
|
8
|
+
# Note: POWO's API schema is not formally documented; this class keeps parsing conservative.
|
|
9
|
+
class Response
|
|
10
|
+
include Enumerable
|
|
11
|
+
|
|
12
|
+
# @param raw [Hash, Array, Object] parsed JSON (or already-decoded object) from the API
|
|
13
|
+
def initialize(raw)
|
|
14
|
+
@raw = raw
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
attr_reader :raw
|
|
18
|
+
|
|
19
|
+
# Array of result rows for search-like responses.
|
|
20
|
+
#
|
|
21
|
+
# @return [Array<Hash>]
|
|
22
|
+
def results
|
|
23
|
+
if raw.is_a?(Hash)
|
|
24
|
+
value =
|
|
25
|
+
raw["results"] || raw[:results]
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
value.is_a?(Array) ? value : []
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Total result count when present.
|
|
32
|
+
#
|
|
33
|
+
# POWO sometimes returns different keys depending on the endpoint / mode.
|
|
34
|
+
#
|
|
35
|
+
# @return [Integer, nil]
|
|
36
|
+
def total_count
|
|
37
|
+
return nil unless raw.is_a?(Hash)
|
|
38
|
+
|
|
39
|
+
raw["totalResults"] || raw[:totalResults] || raw["total"] || raw[:total]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Whether another page is available.
|
|
43
|
+
#
|
|
44
|
+
# Supports both styles:
|
|
45
|
+
# - page/totalPages (legacy/page-based)
|
|
46
|
+
# - cursor-based paging (POWO search)
|
|
47
|
+
#
|
|
48
|
+
# @return [Boolean]
|
|
49
|
+
def next_page?
|
|
50
|
+
return false unless raw.is_a?(Hash)
|
|
51
|
+
|
|
52
|
+
page = raw["page"] || raw[:page]
|
|
53
|
+
total_pages = raw["totalPages"] || raw[:totalPages] || raw["pages"] || raw[:pages]
|
|
54
|
+
cursor = raw["cursor"] || raw[:cursor]
|
|
55
|
+
|
|
56
|
+
if page && total_pages
|
|
57
|
+
page.to_i < total_pages.to_i
|
|
58
|
+
elsif cursor
|
|
59
|
+
cursor.to_s != "*" && !cursor.to_s.empty?
|
|
60
|
+
else
|
|
61
|
+
false
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def each(&block)
|
|
66
|
+
return enum_for(:each) unless block
|
|
67
|
+
|
|
68
|
+
results.each(&block)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module PowoRuby
|
|
6
|
+
# Loads and exposes allow-lists of supported query parameters for POWO and IPNI.
|
|
7
|
+
#
|
|
8
|
+
# The allow-lists can be sourced from `docs/POWO_SEARCH_TERMS.md` (parsed at runtime) or,
|
|
9
|
+
# if that file can't be read, from built-in fallback lists.
|
|
10
|
+
class Terms
|
|
11
|
+
# Internal helper for allowed POWO/IPNI parameters.
|
|
12
|
+
POWO_NAME = %i[
|
|
13
|
+
full_name scientific_name genus species infraspecific family common_name author rank status
|
|
14
|
+
].freeze
|
|
15
|
+
POWO_CHARACTERISTIC = %i[
|
|
16
|
+
summary appearance flower fruit leaf habit habitat use conservation
|
|
17
|
+
].freeze
|
|
18
|
+
POWO_GEOGRAPHY = %i[
|
|
19
|
+
distribution native_distribution introduced_distribution region continent country
|
|
20
|
+
].freeze
|
|
21
|
+
POWO_ADDITIONAL = %i[
|
|
22
|
+
accepted images page limit sort
|
|
23
|
+
].freeze
|
|
24
|
+
|
|
25
|
+
IPNI_NAME = %i[
|
|
26
|
+
genus species infraspecific_rank infraspecific_name family publication_year full_name
|
|
27
|
+
].freeze
|
|
28
|
+
IPNI_AUTHOR = %i[
|
|
29
|
+
author standard_form collaboration
|
|
30
|
+
].freeze
|
|
31
|
+
IPNI_PUBLICATION = %i[
|
|
32
|
+
publication_title publication_year publication_place publisher
|
|
33
|
+
].freeze
|
|
34
|
+
|
|
35
|
+
POWO_GROUP_HEADERS = {
|
|
36
|
+
"name terms" => :name,
|
|
37
|
+
"characteristic terms" => :characteristic,
|
|
38
|
+
"geography terms" => :geography,
|
|
39
|
+
"additional filters" => :additional
|
|
40
|
+
}.freeze
|
|
41
|
+
|
|
42
|
+
IPNI_GROUP_HEADERS = {
|
|
43
|
+
"name terms" => :name,
|
|
44
|
+
"author terms" => :author,
|
|
45
|
+
"publication terms" => :publication
|
|
46
|
+
}.freeze
|
|
47
|
+
|
|
48
|
+
# Load terms from a markdown file when possible, otherwise fall back to defaults.
|
|
49
|
+
#
|
|
50
|
+
# @param path [String] path to `POWO_SEARCH_TERMS.md`
|
|
51
|
+
# @return [PowoRuby::Terms]
|
|
52
|
+
def self.load(path)
|
|
53
|
+
parsed = parse_markdown(path)
|
|
54
|
+
parsed || new(source_path: path)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Parse a terms markdown file.
|
|
58
|
+
#
|
|
59
|
+
# @param path [String]
|
|
60
|
+
# @return [PowoRuby::Terms, nil] nil if unreadable/empty
|
|
61
|
+
def self.parse_markdown(path)
|
|
62
|
+
return nil unless path && File.file?(path)
|
|
63
|
+
|
|
64
|
+
content = File.read(path)
|
|
65
|
+
return nil if content.strip.empty?
|
|
66
|
+
|
|
67
|
+
section = :powo
|
|
68
|
+
group = nil
|
|
69
|
+
|
|
70
|
+
powo = Hash.new { |h, k| h[k] = [] }
|
|
71
|
+
ipni = Hash.new { |h, k| h[k] = [] }
|
|
72
|
+
|
|
73
|
+
content.each_line do |line|
|
|
74
|
+
line = line.strip
|
|
75
|
+
next if line.empty?
|
|
76
|
+
|
|
77
|
+
if line == "# IPNI Search Terms"
|
|
78
|
+
section = :ipni
|
|
79
|
+
group = nil
|
|
80
|
+
next
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
if line.start_with?("## ")
|
|
84
|
+
header = line.delete_prefix("## ").downcase
|
|
85
|
+
group =
|
|
86
|
+
if section == :powo
|
|
87
|
+
POWO_GROUP_HEADERS[header]
|
|
88
|
+
else
|
|
89
|
+
IPNI_GROUP_HEADERS[header]
|
|
90
|
+
end
|
|
91
|
+
next
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
next unless line.start_with?("- ")
|
|
95
|
+
next unless group
|
|
96
|
+
|
|
97
|
+
term = line.delete_prefix("- ").strip
|
|
98
|
+
term = term.split(/\s+/).first
|
|
99
|
+
next if term.nil? || term.empty?
|
|
100
|
+
|
|
101
|
+
sym = term.downcase.to_sym
|
|
102
|
+
if section == :powo
|
|
103
|
+
powo[group] << sym
|
|
104
|
+
else
|
|
105
|
+
ipni[group] << sym
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
new(
|
|
110
|
+
source_path: path,
|
|
111
|
+
powo: powo.transform_values { |v| v.uniq.freeze }.freeze,
|
|
112
|
+
ipni: ipni.transform_values { |v| v.uniq.freeze }.freeze
|
|
113
|
+
)
|
|
114
|
+
rescue Errno::ENOENT, Errno::EACCES
|
|
115
|
+
nil
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def initialize(source_path:, powo: nil, ipni: nil)
|
|
119
|
+
@source_path = source_path
|
|
120
|
+
@powo = powo || {
|
|
121
|
+
name: POWO_NAME,
|
|
122
|
+
characteristic: POWO_CHARACTERISTIC,
|
|
123
|
+
geography: POWO_GEOGRAPHY,
|
|
124
|
+
additional: POWO_ADDITIONAL
|
|
125
|
+
}.freeze
|
|
126
|
+
@ipni = ipni || {
|
|
127
|
+
name: IPNI_NAME,
|
|
128
|
+
author: IPNI_AUTHOR,
|
|
129
|
+
publication: IPNI_PUBLICATION
|
|
130
|
+
}.freeze
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
attr_reader :source_path
|
|
134
|
+
|
|
135
|
+
# Allow-list of supported POWO params as symbols.
|
|
136
|
+
#
|
|
137
|
+
# @return [Set<Symbol>]
|
|
138
|
+
def powo_allowed_params
|
|
139
|
+
@powo.values.flatten.to_set
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Allow-list of supported IPNI params as symbols.
|
|
143
|
+
#
|
|
144
|
+
# @return [Set<Symbol>]
|
|
145
|
+
def ipni_allowed_params
|
|
146
|
+
@ipni.values.flatten.to_set
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "uri"
|
|
4
|
+
|
|
5
|
+
module PowoRuby
|
|
6
|
+
# Small URI helper utilities.
|
|
7
|
+
module URIUtils
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
# Escape a value for safe inclusion in a URL path segment.
|
|
11
|
+
#
|
|
12
|
+
# This is used for `/taxon/<id>` lookups, where IDs can include characters like `/` or `:`.
|
|
13
|
+
#
|
|
14
|
+
# @param value [String]
|
|
15
|
+
# @return [String]
|
|
16
|
+
def escape_path_segment(value)
|
|
17
|
+
URI::DEFAULT_PARSER.escape(value, /[^A-Za-z0-9\-._~]/)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PowoRuby
|
|
4
|
+
# Tiny validation helpers used by the public API.
|
|
5
|
+
#
|
|
6
|
+
# These are intentionally small and strict: they fail fast with {PowoRuby::ValidationError}
|
|
7
|
+
# to keep endpoint methods predictable for callers.
|
|
8
|
+
module Validation
|
|
9
|
+
module_function
|
|
10
|
+
|
|
11
|
+
# Validate that a value is present.
|
|
12
|
+
#
|
|
13
|
+
# @param value [Object]
|
|
14
|
+
# @param name [String] parameter name for error messages
|
|
15
|
+
# @return [void]
|
|
16
|
+
def presence!(value, name:)
|
|
17
|
+
return unless value.nil? || value.to_s.strip.empty?
|
|
18
|
+
|
|
19
|
+
raise ValidationError, "#{name} must be provided"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Validate that a value is a Hash.
|
|
23
|
+
#
|
|
24
|
+
# @param value [Object]
|
|
25
|
+
# @param name [String]
|
|
26
|
+
# @return [void]
|
|
27
|
+
def hash!(value, name:)
|
|
28
|
+
return if value.is_a?(Hash)
|
|
29
|
+
|
|
30
|
+
raise ValidationError, "#{name} must be a Hash"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Validate that a value is boolean (true/false).
|
|
34
|
+
#
|
|
35
|
+
# @param value [Object]
|
|
36
|
+
# @param name [String]
|
|
37
|
+
# @return [void]
|
|
38
|
+
def boolean!(value, name:)
|
|
39
|
+
return if [true, false].include?(value)
|
|
40
|
+
|
|
41
|
+
raise ValidationError, "#{name} must be boolean (true/false)"
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|