rospatent 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +21 -0
- data/README.md +1247 -0
- data/lib/generators/rospatent/install/install_generator.rb +21 -0
- data/lib/generators/rospatent/install/templates/README +29 -0
- data/lib/generators/rospatent/install/templates/initializer.rb +24 -0
- data/lib/rospatent/cache.rb +282 -0
- data/lib/rospatent/client.rb +698 -0
- data/lib/rospatent/configuration.rb +136 -0
- data/lib/rospatent/errors.rb +127 -0
- data/lib/rospatent/input_validator.rb +306 -0
- data/lib/rospatent/logger.rb +286 -0
- data/lib/rospatent/patent_parser.rb +141 -0
- data/lib/rospatent/railtie.rb +26 -0
- data/lib/rospatent/search.rb +222 -0
- data/lib/rospatent/version.rb +5 -0
- data/lib/rospatent.rb +117 -0
- metadata +167 -0
@@ -0,0 +1,698 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "faraday"
|
4
|
+
require "faraday/retry"
|
5
|
+
require "faraday/follow_redirects"
|
6
|
+
require "json"
|
7
|
+
require_relative "input_validator"
|
8
|
+
require_relative "cache"
|
9
|
+
require_relative "logger"
|
10
|
+
|
11
|
+
module Rospatent
|
12
|
+
# Main client for interacting with the Rospatent API
|
13
|
+
class Client
|
14
|
+
include InputValidator
|
15
|
+
# Create a new client instance
|
16
|
+
# @param token [String] JWT token for authentication (optional if set in configuration)
|
17
|
+
# @param logger [Rospatent::Logger] Custom logger instance (optional)
|
18
|
+
# @param cache [Rospatent::Cache] Custom cache instance (optional)
|
19
|
+
def initialize(token: nil, logger: nil, cache: nil)
|
20
|
+
@token = token || Rospatent.configuration.token
|
21
|
+
raise Errors::MissingTokenError, "API token is required" unless @token
|
22
|
+
|
23
|
+
# Initialize logger
|
24
|
+
@logger = logger || create_logger
|
25
|
+
|
26
|
+
# Initialize cache
|
27
|
+
@cache = cache || create_cache
|
28
|
+
|
29
|
+
# Track request metrics
|
30
|
+
@request_count = 0
|
31
|
+
@total_duration = 0.0
|
32
|
+
end
|
33
|
+
|
34
|
+
# Execute a search against the Rospatent API
|
35
|
+
# @param params [Hash] Search parameters
|
36
|
+
# @return [Rospatent::SearchResult] Search result object
|
37
|
+
def search(**params)
|
38
|
+
# Validate search parameters
|
39
|
+
validated_params = validate_search_params(params)
|
40
|
+
Search.new(self).execute(**validated_params)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Fetch a specific patent by its document ID using dedicated endpoint
|
44
|
+
# The document_id must follow one of these formats:
|
45
|
+
# - Published documents: {country code}{publication number}{document type code}_
|
46
|
+
# {publication date YYYYMMDD}
|
47
|
+
# Example: RU134694U1_20131120
|
48
|
+
# - Unpublished applications: {country code}{application number}{document type code}_
|
49
|
+
# {application date YYYYMMDD}
|
50
|
+
#
|
51
|
+
# @param document_id [String] The document ID to retrieve
|
52
|
+
# @return [Hash] The patent document data
|
53
|
+
# @raise [Rospatent::Errors::ApiError] If the document is not found or other API error
|
54
|
+
# @raise [Rospatent::Errors::InvalidRequestError] If document_id format is invalid
|
55
|
+
def patent(document_id)
|
56
|
+
# Validate input
|
57
|
+
validated_id = validate_patent_id(document_id)
|
58
|
+
|
59
|
+
# Check cache first
|
60
|
+
cache_key = "patent:#{validated_id}"
|
61
|
+
cached_result = @cache.get(cache_key)
|
62
|
+
if cached_result
|
63
|
+
@logger.log_cache("hit", cache_key)
|
64
|
+
return cached_result
|
65
|
+
end
|
66
|
+
|
67
|
+
@logger.log_cache("miss", cache_key)
|
68
|
+
|
69
|
+
# Make a GET request to the docs endpoint
|
70
|
+
result = get("/patsearch/v0.2/docs/#{validated_id}")
|
71
|
+
|
72
|
+
# Cache the result
|
73
|
+
@cache.set(cache_key, result, ttl: 3600) # Cache patents for 1 hour
|
74
|
+
@logger.log_cache("set", cache_key, ttl: 3600)
|
75
|
+
|
76
|
+
result
|
77
|
+
end
|
78
|
+
|
79
|
+
# Retrieve document by document components
|
80
|
+
# @param country_code [String] Country code (e.g., "RU")
|
81
|
+
# @param number [String] Patent number
|
82
|
+
# @param doc_type [String] Document type (e.g., "A1")
|
83
|
+
# @param date [String, Date] Publication date
|
84
|
+
# @return [Hash] Document data
|
85
|
+
# @raise [Rospatent::Errors::InvalidRequestError] If any required parameter is missing
|
86
|
+
def patent_by_components(country_code, number, doc_type, date)
|
87
|
+
# Validate and normalize inputs
|
88
|
+
validated_country = validate_string(country_code, "country_code", max_length: 2)
|
89
|
+
validated_number = validate_string(number, "number")
|
90
|
+
validated_doc_type = validate_string(doc_type, "doc_type", max_length: 3)
|
91
|
+
validated_date = validate_date(date, "date")
|
92
|
+
|
93
|
+
formatted_date = validated_date.strftime("%Y%m%d")
|
94
|
+
document_id = "#{validated_country}#{validated_number}#{validated_doc_type}_#{formatted_date}"
|
95
|
+
|
96
|
+
patent(document_id)
|
97
|
+
end
|
98
|
+
|
99
|
+
# Find patents similar to a given document ID
|
100
|
+
# @param document_id [String] The document ID to find similar patents to
|
101
|
+
# @param count [Integer] Maximum number of results to return (default: 100)
|
102
|
+
# @return [Hash] The similar search results
|
103
|
+
# @raise [Rospatent::Errors::InvalidRequestError] If document_id is not provided
|
104
|
+
# @raise [Rospatent::Errors::ApiError] If the API request fails
|
105
|
+
#
|
106
|
+
# This method uses the Rospatent API's similar search endpoint to find patents
|
107
|
+
# similar to the given document ID.
|
108
|
+
# The document ID should be in the format 'XX12345Y1_YYYYMMDD', where 'XX' is
|
109
|
+
# the country code, '12345' is the publication number,
|
110
|
+
# 'Y1' is the document type, and 'YYYYMMDD' is the publication date.
|
111
|
+
#
|
112
|
+
# The method returns a hash containing the similar search results, which includes
|
113
|
+
# the patent IDs, titles, and other relevant information.
|
114
|
+
#
|
115
|
+
# If the document ID is not provided, the method raises an InvalidRequestError.
|
116
|
+
# If the API request fails, the method raises an ApiError.
|
117
|
+
def similar_patents_by_id(document_id, count: 100)
|
118
|
+
# Validate inputs
|
119
|
+
validated_id = validate_patent_id(document_id)
|
120
|
+
validated_count = validate_positive_integer(count, "count", max_value: 1000)
|
121
|
+
|
122
|
+
# Check cache first
|
123
|
+
cache_key = "similar:id:#{validated_id}:#{validated_count}"
|
124
|
+
cached_result = @cache.get(cache_key)
|
125
|
+
if cached_result
|
126
|
+
@logger.log_cache("hit", cache_key)
|
127
|
+
return cached_result
|
128
|
+
end
|
129
|
+
|
130
|
+
@logger.log_cache("miss", cache_key)
|
131
|
+
|
132
|
+
# Build the payload according to API spec
|
133
|
+
payload = {
|
134
|
+
type_search: "id_search",
|
135
|
+
pat_id: validated_id,
|
136
|
+
count: validated_count
|
137
|
+
}
|
138
|
+
|
139
|
+
# Make the API request with redirect handling
|
140
|
+
result = post_with_redirects("/patsearch/v0.2/similar_search", payload)
|
141
|
+
|
142
|
+
# Cache the result
|
143
|
+
@cache.set(cache_key, result, ttl: 1800) # Cache for 30 minutes
|
144
|
+
@logger.log_cache("set", cache_key, ttl: 1800)
|
145
|
+
|
146
|
+
result
|
147
|
+
end
|
148
|
+
|
149
|
+
# Find patents similar to a given text
|
150
|
+
# @param text [String] The text to find similar patents to
|
151
|
+
# @param count [Integer] Maximum number of results to return (default: 100)
|
152
|
+
# @return [Hash] The similar search results
|
153
|
+
# @raise [Rospatent::Errors::InvalidRequestError] If text is not provided
|
154
|
+
def similar_patents_by_text(text, count: 100)
|
155
|
+
# Validate inputs
|
156
|
+
validated_text = validate_string(text, "search_text", max_length: 10_000)
|
157
|
+
validated_count = validate_positive_integer(count, "count", max_value: 1000)
|
158
|
+
|
159
|
+
# Check cache first (using hash of text for key)
|
160
|
+
text_hash = validated_text.hash.abs.to_s(16)
|
161
|
+
cache_key = "similar:text:#{text_hash}:#{validated_count}"
|
162
|
+
cached_result = @cache.get(cache_key)
|
163
|
+
if cached_result
|
164
|
+
@logger.log_cache("hit", cache_key)
|
165
|
+
return cached_result
|
166
|
+
end
|
167
|
+
|
168
|
+
@logger.log_cache("miss", cache_key)
|
169
|
+
|
170
|
+
# Build the payload according to API spec
|
171
|
+
payload = {
|
172
|
+
type_search: "text_search",
|
173
|
+
pat_text: validated_text,
|
174
|
+
count: validated_count
|
175
|
+
}
|
176
|
+
|
177
|
+
# Make the API request with redirect handling
|
178
|
+
result = post_with_redirects("/patsearch/v0.2/similar_search", payload)
|
179
|
+
|
180
|
+
# Cache the result
|
181
|
+
@cache.set(cache_key, result, ttl: 1800) # Cache for 30 minutes
|
182
|
+
@logger.log_cache("set", cache_key, ttl: 1800)
|
183
|
+
|
184
|
+
result
|
185
|
+
end
|
186
|
+
|
187
|
+
# Get the list of available search datasets (collections)
|
188
|
+
# @return [Array<Hash>] List of available datasets organized in a tree structure
|
189
|
+
def datasets_tree
|
190
|
+
# Check cache first
|
191
|
+
cache_key = "datasets:tree"
|
192
|
+
cached_result = @cache.get(cache_key)
|
193
|
+
if cached_result
|
194
|
+
@logger.log_cache("hit", cache_key)
|
195
|
+
return cached_result
|
196
|
+
end
|
197
|
+
|
198
|
+
@logger.log_cache("miss", cache_key)
|
199
|
+
|
200
|
+
# Make the API request
|
201
|
+
result = get("/patsearch/v0.2/datasets/tree")
|
202
|
+
|
203
|
+
# Cache the result for longer since datasets don't change often
|
204
|
+
@cache.set(cache_key, result, ttl: 3600) # Cache for 1 hour
|
205
|
+
@logger.log_cache("set", cache_key, ttl: 3600)
|
206
|
+
|
207
|
+
result
|
208
|
+
end
|
209
|
+
|
210
|
+
# Retrieve media data (PDF, images, 3D objects) for a patent document
|
211
|
+
# @param collection_id [String] Dataset/collection identifier (e.g., "National")
|
212
|
+
# @param country_code [String] Country code of publication (e.g., "RU")
|
213
|
+
# @param doc_type [String] Document type code (e.g., "U1")
|
214
|
+
# @param pub_date [String, Date] Publication date in format YYYY/MM/DD
|
215
|
+
# @param pub_number [String] Publication number
|
216
|
+
# @param filename [String] Media file name (e.g., "document.pdf")
|
217
|
+
# @return [String] Binary content of the requested file
|
218
|
+
# @raise [Rospatent::Errors::InvalidRequestError] If any required parameter is missing
|
219
|
+
def patent_media(collection_id, country_code, doc_type, pub_date, pub_number,
|
220
|
+
filename)
|
221
|
+
# Validate and normalize inputs
|
222
|
+
validated_collection = validate_required_string(collection_id, "collection_id")
|
223
|
+
validated_country = validate_required_string(country_code, "country_code", max_length: 2)
|
224
|
+
validated_doc_type = validate_required_string(doc_type, "doc_type", max_length: 3)
|
225
|
+
validated_date = validate_required_date(pub_date, "pub_date")
|
226
|
+
validated_number = validate_required_string(pub_number, "pub_number")
|
227
|
+
validated_filename = validate_required_string(filename, "filename")
|
228
|
+
|
229
|
+
# Format publication date
|
230
|
+
formatted_date = validated_date.strftime("%Y/%m/%d")
|
231
|
+
|
232
|
+
# Construct the path
|
233
|
+
path = "/media/#{validated_collection}/#{validated_country}/" \
|
234
|
+
"#{validated_doc_type}/#{formatted_date}/#{validated_number}/" \
|
235
|
+
"#{validated_filename}"
|
236
|
+
|
237
|
+
# Make a GET request to retrieve the media file
|
238
|
+
get(path)
|
239
|
+
end
|
240
|
+
|
241
|
+
# Simplified method to retrieve media data by patent ID and collection ID
|
242
|
+
# @param document_id [String] The patent document ID (e.g., "RU134694U1_20131120")
|
243
|
+
# @param collection_id [String] Dataset/collection identifier (e.g., "National")
|
244
|
+
# @param filename [String] Media file name (e.g., "document.pdf")
|
245
|
+
# @return [String] Binary content of the requested file
|
246
|
+
# @raise [Rospatent::Errors::InvalidRequestError] If document_id format is invalid
|
247
|
+
# or parameters are missing
|
248
|
+
def patent_media_by_id(document_id, collection_id, filename)
|
249
|
+
# Validate inputs
|
250
|
+
validated_id = validate_patent_id(document_id)
|
251
|
+
validated_collection = validate_required_string(collection_id, "collection_id")
|
252
|
+
validated_filename = validate_required_string(filename, "filename")
|
253
|
+
|
254
|
+
# Parse the patent ID to extract components
|
255
|
+
id_parts = parse_patent_id(validated_id)
|
256
|
+
|
257
|
+
# Format the date from YYYYMMDD to YYYY/MM/DD
|
258
|
+
formatted_date = id_parts[:date].gsub(/^(\d{4})(\d{2})(\d{2})$/, '\1/\2/\3')
|
259
|
+
|
260
|
+
# Call the base method with extracted components
|
261
|
+
patent_media(validated_collection, id_parts[:country_code], id_parts[:doc_type],
|
262
|
+
formatted_date, id_parts[:number], validated_filename)
|
263
|
+
end
|
264
|
+
|
265
|
+
# Extract and parse the abstract content from a patent document
|
266
|
+
# Delegates to PatentParser.parse_abstract
|
267
|
+
# @param patent_data [Hash] The patent document data returned by #patent method
|
268
|
+
# @param format [Symbol] The desired output format (:text or :html)
|
269
|
+
# @param language [String] The language code (e.g., "ru", "en")
|
270
|
+
# @return [String, nil] The parsed abstract content in the requested format or nil if not found
|
271
|
+
# @example Get plain text abstract
|
272
|
+
# abstract = client.parse_abstract(patent_doc)
|
273
|
+
# @example Get HTML abstract in English
|
274
|
+
# abstract_html = client.parse_abstract(patent_doc, format: :html, language: "en")
|
275
|
+
def parse_abstract(patent_data, format: :text, language: "ru")
|
276
|
+
# Validate inputs
|
277
|
+
validate_enum(format, %i[text html], "format")
|
278
|
+
validate_string(language, "language", max_length: 5) if language
|
279
|
+
|
280
|
+
PatentParser.parse_abstract(patent_data, format: format, language: language)
|
281
|
+
end
|
282
|
+
|
283
|
+
# Extract and parse the description content from a patent document
|
284
|
+
# Delegates to PatentParser.parse_description
|
285
|
+
# @param patent_data [Hash] The patent document data returned by #patent method
|
286
|
+
# @param format [Symbol] The desired output format (:text, :html, or :sections)
|
287
|
+
# @param language [String] The language code (e.g., "ru", "en")
|
288
|
+
# @return [String, Array, nil] The parsed description content in the requested
|
289
|
+
# format or nil if not found
|
290
|
+
# @example Get plain text description
|
291
|
+
# description = client.parse_description(patent_doc)
|
292
|
+
# @example Get HTML description
|
293
|
+
# description_html = client.parse_description(patent_doc, format: :html)
|
294
|
+
# @example Get description split into sections
|
295
|
+
# sections = client.parse_description(patent_doc, format: :sections)
|
296
|
+
def parse_description(patent_data, format: :text, language: "ru")
|
297
|
+
# Validate inputs
|
298
|
+
validate_enum(format, %i[text html sections], "format")
|
299
|
+
validate_string(language, "language", max_length: 5) if language
|
300
|
+
|
301
|
+
PatentParser.parse_description(patent_data, format: format, language: language)
|
302
|
+
end
|
303
|
+
|
304
|
+
# Search within a classification system (IPC or CPC) using natural language
|
305
|
+
# @param classifier_id [String] Classification system identifier ("ipc" or "cpc")
|
306
|
+
# @param query [String] Search query in natural language
|
307
|
+
# @param lang [String] Language for the search ("ru" or "en")
|
308
|
+
# @return [Hash] Search results containing classification codes and descriptions
|
309
|
+
# @raise [Rospatent::Errors::ValidationError] If parameters are invalid
|
310
|
+
# @example Search for rocket-related IPC codes
|
311
|
+
# results = client.classification_search("ipc", query: "ракета", lang: "ru")
|
312
|
+
def classification_search(classifier_id, query:, lang: "ru")
|
313
|
+
# Validate inputs
|
314
|
+
validated_classifier = validate_enum(classifier_id, %w[ipc cpc], "classifier_id").to_s
|
315
|
+
validated_query = validate_string(query, "query", max_length: 1000)
|
316
|
+
validated_lang = validate_enum(lang, %w[ru en], "lang").to_s
|
317
|
+
|
318
|
+
# Check cache first
|
319
|
+
cache_key = "classification:search:#{validated_classifier}:" \
|
320
|
+
"#{validated_query}:#{validated_lang}"
|
321
|
+
cached_result = @cache.get(cache_key)
|
322
|
+
if cached_result
|
323
|
+
@logger.log_cache("hit", cache_key)
|
324
|
+
return cached_result
|
325
|
+
end
|
326
|
+
|
327
|
+
@logger.log_cache("miss", cache_key)
|
328
|
+
|
329
|
+
# Build the payload
|
330
|
+
payload = {
|
331
|
+
query: validated_query,
|
332
|
+
lang: validated_lang
|
333
|
+
}
|
334
|
+
|
335
|
+
# Make a POST request to the classification search endpoint
|
336
|
+
result = post("/patsearch/v0.2/classification/#{validated_classifier}/search", payload)
|
337
|
+
|
338
|
+
# Cache the result
|
339
|
+
@cache.set(cache_key, result, ttl: 1800) # Cache for 30 minutes
|
340
|
+
@logger.log_cache("set", cache_key, ttl: 1800)
|
341
|
+
|
342
|
+
result
|
343
|
+
end
|
344
|
+
|
345
|
+
# Get detailed information about a specific classification code
|
346
|
+
# @param classifier_id [String] Classification system identifier ("ipc" or "cpc")
|
347
|
+
# @param code [String] Classification code to look up
|
348
|
+
# @param lang [String] Language for the description ("ru" or "en")
|
349
|
+
# @return [Hash] Detailed information about the classification code
|
350
|
+
# @raise [Rospatent::Errors::ValidationError] If parameters are invalid
|
351
|
+
# @example Get information about IPC code
|
352
|
+
# info = client.classification_code("ipc", code: "F02K9/00", lang: "ru")
|
353
|
+
def classification_code(classifier_id, code:, lang: "ru")
|
354
|
+
# Validate inputs
|
355
|
+
validated_classifier = validate_enum(classifier_id, %w[ipc cpc], "classifier_id").to_s
|
356
|
+
validated_code = validate_string(code, "code", max_length: 50)
|
357
|
+
validated_lang = validate_enum(lang, %w[ru en], "lang").to_s
|
358
|
+
|
359
|
+
# Check cache first
|
360
|
+
cache_key = "classification:code:#{validated_classifier}:#{validated_code}:#{validated_lang}"
|
361
|
+
cached_result = @cache.get(cache_key)
|
362
|
+
if cached_result
|
363
|
+
@logger.log_cache("hit", cache_key)
|
364
|
+
return cached_result
|
365
|
+
end
|
366
|
+
|
367
|
+
@logger.log_cache("miss", cache_key)
|
368
|
+
|
369
|
+
# Build the payload
|
370
|
+
payload = {
|
371
|
+
code: validated_code,
|
372
|
+
lang: validated_lang
|
373
|
+
}
|
374
|
+
|
375
|
+
# Make a POST request to the classification code endpoint
|
376
|
+
result = post("/patsearch/v0.2/classification/#{validated_classifier}/code", payload)
|
377
|
+
|
378
|
+
# Cache the result for longer since classification codes don't change often
|
379
|
+
@cache.set(cache_key, result, ttl: 3600) # Cache for 1 hour
|
380
|
+
@logger.log_cache("set", cache_key, ttl: 3600)
|
381
|
+
|
382
|
+
result
|
383
|
+
end
|
384
|
+
|
385
|
+
# Execute a GET request to the API
|
386
|
+
# @param endpoint [String] API endpoint
|
387
|
+
# @param params [Hash] Query parameters (optional)
|
388
|
+
# @return [Hash] Response data
|
389
|
+
def get(endpoint, params = {})
|
390
|
+
start_time = Time.now
|
391
|
+
request_id = generate_request_id
|
392
|
+
|
393
|
+
@logger.log_request("GET", endpoint, params, connection.headers)
|
394
|
+
@request_count += 1
|
395
|
+
|
396
|
+
response = connection.get(endpoint, params) do |req|
|
397
|
+
req.headers["Accept"] = "application/json"
|
398
|
+
req.headers["Content-Type"] = "application/json"
|
399
|
+
req.headers["X-Request-ID"] = request_id
|
400
|
+
end
|
401
|
+
|
402
|
+
duration = Time.now - start_time
|
403
|
+
@total_duration += duration
|
404
|
+
|
405
|
+
@logger.log_response("GET", endpoint, response.status, duration,
|
406
|
+
response_size: response.body&.bytesize, request_id: request_id)
|
407
|
+
|
408
|
+
handle_response(response, request_id)
|
409
|
+
rescue Faraday::Error => e
|
410
|
+
@logger.log_error(e, { endpoint: endpoint, params: params, request_id: request_id })
|
411
|
+
handle_error(e)
|
412
|
+
end
|
413
|
+
|
414
|
+
# Execute a POST request to the API
|
415
|
+
# @param endpoint [String] API endpoint
|
416
|
+
# @param payload [Hash] Request payload
|
417
|
+
# @return [Hash] Response data
|
418
|
+
def post(endpoint, payload)
|
419
|
+
start_time = Time.now
|
420
|
+
request_id = generate_request_id
|
421
|
+
|
422
|
+
@logger.log_request("POST", endpoint, payload, connection.headers)
|
423
|
+
@request_count += 1
|
424
|
+
|
425
|
+
response = connection.post(endpoint) do |req|
|
426
|
+
req.headers["Accept"] = "application/json"
|
427
|
+
req.headers["Content-Type"] = "application/json"
|
428
|
+
req.headers["X-Request-ID"] = request_id
|
429
|
+
req.body = payload.to_json
|
430
|
+
end
|
431
|
+
|
432
|
+
duration = Time.now - start_time
|
433
|
+
@total_duration += duration
|
434
|
+
|
435
|
+
@logger.log_response("POST", endpoint, response.status, duration,
|
436
|
+
response_size: response.body&.bytesize, request_id: request_id)
|
437
|
+
|
438
|
+
handle_response(response, request_id)
|
439
|
+
rescue Faraday::Error => e
|
440
|
+
@logger.log_error(e, { endpoint: endpoint, payload: payload, request_id: request_id })
|
441
|
+
handle_error(e)
|
442
|
+
end
|
443
|
+
|
444
|
+
# Batch process multiple patents
|
445
|
+
# @param document_ids [Array<String>] Array of document IDs
|
446
|
+
# @param batch_size [Integer] Number of patents to process concurrently
|
447
|
+
# @return [Enumerator] Enumerator that yields patent documents
|
448
|
+
def batch_patents(document_ids, batch_size: 10)
|
449
|
+
return enum_for(:batch_patents, document_ids, batch_size: batch_size) unless block_given?
|
450
|
+
|
451
|
+
validate_array(document_ids, "document_ids", max_size: 1000)
|
452
|
+
validated_batch_size = validate_positive_integer(batch_size, "batch_size", max_value: 50)
|
453
|
+
|
454
|
+
document_ids.each_slice(validated_batch_size) do |batch|
|
455
|
+
threads = batch.map do |doc_id|
|
456
|
+
Thread.new do
|
457
|
+
patent(doc_id)
|
458
|
+
rescue StandardError => e
|
459
|
+
@logger.log_error(e, { document_id: doc_id, operation: "batch_patents" })
|
460
|
+
{ error: e.message, document_id: doc_id }
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
threads.each { |thread| yield thread.value }
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
468
|
+
# Get client statistics
|
469
|
+
# @return [Hash] Client usage statistics
|
470
|
+
def statistics
|
471
|
+
{
|
472
|
+
requests_made: @request_count,
|
473
|
+
total_duration_seconds: @total_duration.round(3),
|
474
|
+
average_request_time: if @request_count.positive?
|
475
|
+
(@total_duration / @request_count).round(3)
|
476
|
+
else
|
477
|
+
0
|
478
|
+
end,
|
479
|
+
cache_stats: @cache.statistics
|
480
|
+
}
|
481
|
+
end
|
482
|
+
|
483
|
+
private
|
484
|
+
|
485
|
+
# Validate search parameters
|
486
|
+
# @param params [Hash] Search parameters to validate
|
487
|
+
# @return [Hash] Validated parameters
|
488
|
+
def validate_search_params(params)
|
489
|
+
validations = {
|
490
|
+
q: { type: :string, max_length: 1000 },
|
491
|
+
qn: { type: :string, max_length: 1000 },
|
492
|
+
limit: { type: :positive_integer, min_value: 1, max_value: 100 },
|
493
|
+
offset: { type: :positive_integer, min_value: 0, max_value: 10_000 },
|
494
|
+
pre_tag: { type: :string, max_length: 50 },
|
495
|
+
post_tag: { type: :string, max_length: 50 },
|
496
|
+
sort: { type: :enum, allowed_values: %i[relevance pub_date filing_date] },
|
497
|
+
group_by: { type: :enum, allowed_values: [:patent_family] },
|
498
|
+
include_facets: { type: :boolean },
|
499
|
+
highlight: { type: :boolean },
|
500
|
+
datasets: { type: :array, max_size: 10 }
|
501
|
+
}
|
502
|
+
|
503
|
+
validate_params(params, validations)
|
504
|
+
end
|
505
|
+
|
506
|
+
# Parse a patent ID string into its component parts
|
507
|
+
# @param document_id [String] The document ID to parse
|
508
|
+
# @return [Hash] The component parts of the document ID
|
509
|
+
# @example Parse "RU134694U1_20131120"
|
510
|
+
# parse_patent_id("RU134694U1_20131120")
|
511
|
+
# # => { country_code: "RU", number: "134694", doc_type: "U1", date: "20131120" }
|
512
|
+
def parse_patent_id(document_id)
|
513
|
+
# Split into main parts (before and after underscore)
|
514
|
+
main_part, date = document_id.split("_")
|
515
|
+
|
516
|
+
# Extract country code (first 2 characters)
|
517
|
+
country_code = main_part[0..1]
|
518
|
+
|
519
|
+
# Extract doc type (letter+digit at the end of main part)
|
520
|
+
# This regex finds the last occurrence of a letter followed by digits at the end of the string
|
521
|
+
doc_type_match = main_part.match(/([A-Z]\d+)$/)
|
522
|
+
doc_type = doc_type_match ? doc_type_match[0] : nil
|
523
|
+
|
524
|
+
# Extract number (everything between country code and doc type)
|
525
|
+
number_end_pos = doc_type_match ? doc_type_match.begin(0) - 1 : -1
|
526
|
+
number = main_part[2..number_end_pos]
|
527
|
+
|
528
|
+
{
|
529
|
+
country_code: country_code,
|
530
|
+
number: number,
|
531
|
+
doc_type: doc_type,
|
532
|
+
date: date
|
533
|
+
}
|
534
|
+
end
|
535
|
+
|
536
|
+
# Create a Faraday connection with appropriate configuration
|
537
|
+
# @return [Faraday::Connection] Configured connection
|
538
|
+
def connection
|
539
|
+
@connection ||= Faraday.new(url: Rospatent.configuration.api_url) do |conn|
|
540
|
+
conn.headers["Authorization"] = "Bearer #{@token}"
|
541
|
+
conn.headers["User-Agent"] = Rospatent.configuration.user_agent
|
542
|
+
|
543
|
+
conn.options.timeout = Rospatent.configuration.timeout
|
544
|
+
conn.options.open_timeout = Rospatent.configuration.timeout
|
545
|
+
|
546
|
+
conn.request :retry, {
|
547
|
+
max: Rospatent.configuration.retry_count,
|
548
|
+
interval: 0.5,
|
549
|
+
interval_randomness: 0.5,
|
550
|
+
backoff_factor: 2
|
551
|
+
}
|
552
|
+
|
553
|
+
conn.adapter Faraday.default_adapter
|
554
|
+
end
|
555
|
+
end
|
556
|
+
|
557
|
+
# Create a Faraday connection with redirect following for specific endpoints
|
558
|
+
# @return [Faraday::Connection] Configured connection with redirect support
|
559
|
+
def connection_with_redirects
|
560
|
+
@connection_with_redirects ||= Faraday.new(url: Rospatent.configuration.api_url) do |conn|
|
561
|
+
conn.headers["Authorization"] = "Bearer #{@token}"
|
562
|
+
conn.headers["User-Agent"] = Rospatent.configuration.user_agent
|
563
|
+
|
564
|
+
conn.options.timeout = Rospatent.configuration.timeout
|
565
|
+
conn.options.open_timeout = Rospatent.configuration.timeout
|
566
|
+
|
567
|
+
conn.request :retry, {
|
568
|
+
max: Rospatent.configuration.retry_count,
|
569
|
+
interval: 0.5,
|
570
|
+
interval_randomness: 0.5,
|
571
|
+
backoff_factor: 2
|
572
|
+
}
|
573
|
+
|
574
|
+
conn.response :follow_redirects
|
575
|
+
conn.adapter Faraday.default_adapter
|
576
|
+
end
|
577
|
+
end
|
578
|
+
|
579
|
+
# Make an HTTP POST request with redirect support
|
580
|
+
# @param endpoint [String] API endpoint
|
581
|
+
# @param payload [Hash] Request payload
|
582
|
+
# @return [Hash] Parsed response data
|
583
|
+
def post_with_redirects(endpoint, payload = {})
|
584
|
+
start_time = Time.now
|
585
|
+
request_id = generate_request_id
|
586
|
+
|
587
|
+
@logger.log_request("POST", endpoint, payload, connection_with_redirects.headers)
|
588
|
+
@request_count += 1
|
589
|
+
|
590
|
+
response = connection_with_redirects.post(endpoint) do |req|
|
591
|
+
req.headers["Accept"] = "application/json"
|
592
|
+
req.headers["Content-Type"] = "application/json"
|
593
|
+
req.headers["X-Request-ID"] = request_id
|
594
|
+
req.body = payload.to_json
|
595
|
+
end
|
596
|
+
|
597
|
+
duration = Time.now - start_time
|
598
|
+
@total_duration += duration
|
599
|
+
|
600
|
+
@logger.log_response("POST", endpoint, response.status, duration,
|
601
|
+
response_size: response.body&.bytesize, request_id: request_id)
|
602
|
+
|
603
|
+
handle_response(response, request_id)
|
604
|
+
rescue Faraday::Error => e
|
605
|
+
@logger.log_error(e, { endpoint: endpoint, payload: payload, request_id: request_id })
|
606
|
+
handle_error(e)
|
607
|
+
end
|
608
|
+
|
609
|
+
# Process API response
|
610
|
+
# @param response [Faraday::Response] Raw response from the API
|
611
|
+
# @param request_id [String] Request ID for tracking
|
612
|
+
# @return [Hash] Parsed response data
|
613
|
+
# @raise [Rospatent::Errors::ApiError] If the response is not successful
|
614
|
+
def handle_response(response, request_id = nil)
|
615
|
+
return JSON.parse(response.body) if response.success?
|
616
|
+
|
617
|
+
error_msg = begin
|
618
|
+
data = JSON.parse(response.body)
|
619
|
+
data["error"] || data["message"] || "Unknown error"
|
620
|
+
rescue JSON::ParserError
|
621
|
+
response.body
|
622
|
+
end
|
623
|
+
|
624
|
+
# Create specific error types based on status code
|
625
|
+
case response.status
|
626
|
+
when 401
|
627
|
+
raise Errors::AuthenticationError, "#{error_msg} [Request ID: #{request_id}]"
|
628
|
+
when 404
|
629
|
+
raise Errors::NotFoundError.new("#{error_msg} [Request ID: #{request_id}]", response.status)
|
630
|
+
when 422
|
631
|
+
errors = extract_validation_errors(response)
|
632
|
+
raise Errors::ValidationError.new(error_msg, errors)
|
633
|
+
when 429
|
634
|
+
retry_after = response.headers["Retry-After"]&.to_i
|
635
|
+
raise Errors::RateLimitError.new(error_msg, response.status, retry_after)
|
636
|
+
when 503
|
637
|
+
raise Errors::ServiceUnavailableError.new("#{error_msg} [Request ID: #{request_id}]",
|
638
|
+
response.status)
|
639
|
+
else
|
640
|
+
raise Errors::ApiError.new(error_msg, response.status, response.body, request_id)
|
641
|
+
end
|
642
|
+
end
|
643
|
+
|
644
|
+
# Handle connection errors
|
645
|
+
# @param error [Faraday::Error] Connection error
|
646
|
+
# @raise [Rospatent::Errors::ConnectionError] Wrapped connection error
|
647
|
+
def handle_error(error)
|
648
|
+
case error
|
649
|
+
when Faraday::TimeoutError
|
650
|
+
raise Errors::TimeoutError.new("Request timed out: #{error.message}", error)
|
651
|
+
when Faraday::ConnectionFailed
|
652
|
+
raise Errors::ConnectionError.new("Connection failed: #{error.message}", error)
|
653
|
+
else
|
654
|
+
raise Errors::ConnectionError.new("Connection error: #{error.message}", error)
|
655
|
+
end
|
656
|
+
end
|
657
|
+
|
658
|
+
# Extract validation errors from API response
|
659
|
+
# @param response [Faraday::Response] API response
|
660
|
+
# @return [Hash] Field-specific validation errors
|
661
|
+
def extract_validation_errors(response)
|
662
|
+
data = JSON.parse(response.body)
|
663
|
+
data["errors"] || data["validation_errors"] || {}
|
664
|
+
rescue JSON::ParserError
|
665
|
+
{}
|
666
|
+
end
|
667
|
+
|
668
|
+
# Create logger instance based on configuration
|
669
|
+
# @return [Rospatent::Logger, Rospatent::NullLogger] Logger instance
|
670
|
+
def create_logger
|
671
|
+
config = Rospatent.configuration
|
672
|
+
return NullLogger.new if config.log_level == :none
|
673
|
+
|
674
|
+
Logger.new(
|
675
|
+
level: config.log_level,
|
676
|
+
formatter: config.environment == "production" ? :json : :text
|
677
|
+
)
|
678
|
+
end
|
679
|
+
|
680
|
+
# Create cache instance based on configuration
|
681
|
+
# @return [Rospatent::Cache, Rospatent::NullCache] Cache instance
|
682
|
+
def create_cache
|
683
|
+
config = Rospatent.configuration
|
684
|
+
return NullCache.new unless config.cache_enabled
|
685
|
+
|
686
|
+
Cache.new(
|
687
|
+
ttl: config.cache_ttl,
|
688
|
+
max_size: config.cache_max_size
|
689
|
+
)
|
690
|
+
end
|
691
|
+
|
692
|
+
# Generate a unique request ID
|
693
|
+
# @return [String] Unique request identifier
|
694
|
+
def generate_request_id
|
695
|
+
"req_#{Time.now.to_f}_#{rand(10_000)}"
|
696
|
+
end
|
697
|
+
end
|
698
|
+
end
|