ox-tender-abstract 0.9.2 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c1e519805302809f117b67e75099300485fc5910038e6ab6012eb9a4343f8e0b
4
- data.tar.gz: ffccc77f4305ca644cbe5128a2f1178e8ff30170b25180686fc3db12d5a19761
3
+ metadata.gz: 9924cb49a0b35703d8e0093b3ae40b8b8556f7da398990b3ff5f84f185c7d944
4
+ data.tar.gz: 2598c2bb4af463aa3cbf1bde9c1cc76aac352f8e58a193152305dccc4a2cef8a
5
5
  SHA512:
6
- metadata.gz: d062a1e8139143c3c86490c54b9e367f9e6053d27e2a981f8202b2496d187d56f11e2b1b2f101c1c3b9ae69045e5388f34365f9d88e77f480177603c8fd7a1f8
7
- data.tar.gz: 57710ad7e471c7165d2878b7ffcbac5a2fbd6ffba10bc9c6c2a47af4015ca639df1d53f1cf89956858838dce7d18efdfca811a4d5f5bef2d53fb46c6a4fa14b7
6
+ metadata.gz: 3c0a783ab40ca1f45be2d0d5c32db63345a40502bdf6140b446018686b73482afba84ce3097d7ab7afe821b106915e06ba32b03b66a35a19adf95fdc4ad2c404
7
+ data.tar.gz: 53a15a50dad376969b8ff1869e88197a669288bdd57b3931ce59105c8288b25f6f9a9768e03b9211f8284570e4bbf219daa98f2702270c39c967c5e1737c4ead
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## [0.9.3] - 2025-07-27
2
+
3
+ - Added support for parsing tender documents
4
+ - Added support for parsing contract documents
5
+ - Added support for parsing organization documents
6
+ - Added support for parsing generic documents
7
+ - Added support for parsing attachments
8
+ - Added support for parsing tender documents
9
+ - Added support for parsing contract documents
10
+ - Added support for parsing organization documents
11
+
1
12
  ## [0.9.0] - 2025-07-15
2
13
 
3
14
  - Initial release
data/README.md CHANGED
@@ -273,6 +273,41 @@ puts result.data[:total_archives] # => 6
273
273
  # Processing typically takes 10-15 seconds for a full day's data
274
274
  ```
275
275
 
276
+ ## Error Handling
277
+
278
+ The library uses the `Result` pattern for error handling:
279
+
280
+ ```ruby
281
+ result = OxTenderAbstract.search_tenders(org_region: '77', exact_date: '2024-01-01')
282
+
283
+ if result.success?
284
+ puts "Found tenders: #{result.data[:tenders].size}"
285
+ else
286
+ puts "Error: #{result.error}"
287
+
288
+ # Check error type for special handling
289
+ if result.metadata[:error_type] == :blocked
290
+ retry_after = result.metadata[:retry_after] || 600
291
+ puts "API blocked for #{retry_after} seconds"
292
+ end
293
+ end
294
+ ```
295
+
296
+ ### Handling API Blocks
297
+
298
+ When making frequent requests, the API may block archive downloads for 10 minutes. The library automatically detects such blocks:
299
+
300
+ ```ruby
301
+ result = OxTenderAbstract.search_tenders(org_region: '77', exact_date: '2024-01-01')
302
+
303
+ if result.failure? && result.metadata[:error_type] == :blocked
304
+ retry_after = result.metadata[:retry_after] # 600 seconds (10 minutes)
305
+ puts "Download blocked, retry in #{retry_after} seconds"
306
+ end
307
+ ```
308
+
309
+ For detailed guidance on using with Sidekiq background jobs, see [SIDEKIQ_USAGE.md](SIDEKIQ_USAGE.md).
310
+
276
311
  ## Requirements
277
312
 
278
313
  - Ruby >= 3.0.0
@@ -1,39 +1,172 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'oxtenderabstract/version'
4
- require_relative 'oxtenderabstract/logger'
4
+ require_relative 'oxtenderabstract/configuration'
5
5
  require_relative 'oxtenderabstract/errors'
6
+ require_relative 'oxtenderabstract/logger'
6
7
  require_relative 'oxtenderabstract/result'
7
8
  require_relative 'oxtenderabstract/document_types'
8
- require_relative 'oxtenderabstract/configuration'
9
- require_relative 'oxtenderabstract/xml_parser'
10
9
  require_relative 'oxtenderabstract/archive_processor'
10
+ require_relative 'oxtenderabstract/xml_parser'
11
11
  require_relative 'oxtenderabstract/client'
12
12
 
13
13
  # Main module for OxTenderAbstract library
14
14
  module OxTenderAbstract
15
15
  class Error < StandardError; end
16
16
 
17
- # Convenience method to create a new client
18
- def self.client(token: nil)
19
- Client.new(token: token)
20
- end
17
+ class << self
18
+ def configure
19
+ yield(configuration)
20
+ end
21
21
 
22
- # Search tenders by region and date (convenience method)
23
- def self.search_tenders(org_region:, exact_date:, token: nil, **options)
24
- client = Client.new(token: token)
25
- client.search_tenders(org_region: org_region, exact_date: exact_date, **options)
26
- end
22
+ def configuration
23
+ @configuration ||= Configuration.new
24
+ end
27
25
 
28
- # Enhanced search tenders with detailed information (convenience method)
29
- def self.enhanced_search_tenders(org_region:, exact_date:, token: nil, **options)
30
- client = Client.new(token: token)
31
- client.enhanced_search_tenders(org_region: org_region, exact_date: exact_date, **options)
32
- end
26
+ def reset_configuration!
27
+ @configuration = nil
28
+ end
29
+
30
+ # Convenience method for searching tenders in specific subsystem
31
+ def search_tenders(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
32
+ document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE)
33
+ client = Client.new
34
+ client.search_tenders(
35
+ org_region: org_region,
36
+ exact_date: exact_date,
37
+ subsystem_type: subsystem_type,
38
+ document_type: document_type
39
+ )
40
+ end
41
+
42
+ # Enhanced method for searching tenders across multiple subsystems
43
+ def search_all_tenders(org_region:, exact_date:, subsystems: nil, document_types: nil)
44
+ # Default subsystems to search
45
+ subsystems ||= %w[PRIZ RPEC RPGZ BTK UR RGK OD223 RD223]
46
+
47
+ client = Client.new
48
+ all_results = {}
49
+ total_tenders = []
50
+ total_archives = 0
51
+
52
+ subsystems.each do |subsystem_type|
53
+ # Get appropriate document types for this subsystem
54
+ available_types = DocumentTypes.document_types_for_subsystem(subsystem_type)
55
+ test_types = document_types || [available_types.first] # Test first type by default
56
+
57
+ subsystem_results = {
58
+ subsystem: subsystem_type,
59
+ description: DocumentTypes.description_for_subsystem(subsystem_type),
60
+ tenders: [],
61
+ archives: 0,
62
+ errors: []
63
+ }
64
+
65
+ test_types.each do |doc_type|
66
+ result = client.search_tenders(
67
+ org_region: org_region,
68
+ exact_date: exact_date,
69
+ subsystem_type: subsystem_type,
70
+ document_type: doc_type
71
+ )
72
+
73
+ if result.success?
74
+ tenders = result.data[:tenders] || []
75
+ archives = result.data[:total_archives] || 0
76
+
77
+ subsystem_results[:tenders].concat(tenders)
78
+ subsystem_results[:archives] += archives
79
+ total_archives += archives
80
+
81
+ # Add subsystem info to each tender
82
+ tenders.each do |tender|
83
+ tender[:subsystem_type] = subsystem_type
84
+ tender[:subsystem_description] = DocumentTypes.description_for_subsystem(subsystem_type)
85
+ tender[:document_type_used] = doc_type
86
+ end
87
+
88
+ total_tenders.concat(tenders)
89
+ else
90
+ subsystem_results[:errors] << "#{doc_type}: #{result.error}"
91
+ end
92
+ rescue StandardError => e
93
+ subsystem_results[:errors] << "#{doc_type}: #{e.message}"
94
+ end
95
+
96
+ all_results[subsystem_type] = subsystem_results
97
+ end
98
+
99
+ Result.success({
100
+ tenders: total_tenders,
101
+ total_archives: total_archives,
102
+ subsystem_results: all_results,
103
+ search_params: {
104
+ org_region: org_region,
105
+ exact_date: exact_date,
106
+ subsystems_searched: subsystems.size
107
+ },
108
+ processed_at: Time.now
109
+ })
110
+ end
111
+
112
+ # Get documents by registry number across subsystems
113
+ def get_docs_by_reestr_number(reestr_number:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM)
114
+ client = Client.new
115
+ client.get_docs_by_reestr_number(
116
+ reestr_number: reestr_number,
117
+ subsystem_type: subsystem_type
118
+ )
119
+ end
120
+
121
+ # Enhanced search with detailed information extraction
122
+ def enhanced_search_tenders(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
123
+ document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE,
124
+ include_attachments: true)
125
+ client = Client.new
126
+ client.enhanced_search_tenders(
127
+ org_region: org_region,
128
+ exact_date: exact_date,
129
+ subsystem_type: subsystem_type,
130
+ document_type: document_type,
131
+ include_attachments: include_attachments
132
+ )
133
+ end
33
134
 
34
- # Get documents by registry number (convenience method)
35
- def self.get_docs_by_reestr_number(reestr_number:, token: nil, **options)
36
- client = Client.new(token: token)
37
- client.get_docs_by_reestr_number(reestr_number: reestr_number, **options)
135
+ # Search tenders with automatic wait on API blocks and resume capability
136
+ def search_tenders_with_auto_wait(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
137
+ document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE, resume_state: nil)
138
+ client = Client.new
139
+
140
+ # Если есть состояние для продолжения
141
+ if resume_state
142
+ start_from = resume_state[:next_archive_index] || 0
143
+ client.search_tenders_with_resume(
144
+ org_region: org_region,
145
+ exact_date: exact_date,
146
+ subsystem_type: subsystem_type,
147
+ document_type: document_type,
148
+ start_from_archive: start_from,
149
+ resume_state: resume_state
150
+ )
151
+ else
152
+ # Используем обычный метод если авто-ожидание включено
153
+ if configuration.auto_wait_on_block
154
+ client.search_tenders(
155
+ org_region: org_region,
156
+ exact_date: exact_date,
157
+ subsystem_type: subsystem_type,
158
+ document_type: document_type
159
+ )
160
+ else
161
+ # Используем метод с возможностью продолжения
162
+ client.search_tenders_with_resume(
163
+ org_region: org_region,
164
+ exact_date: exact_date,
165
+ subsystem_type: subsystem_type,
166
+ document_type: document_type
167
+ )
168
+ end
169
+ end
170
+ end
38
171
  end
39
172
  end
@@ -13,6 +13,8 @@ module OxTenderAbstract
13
13
  include ContextualLogger
14
14
 
15
15
  MAX_FILE_SIZE_BYTES = 100 * 1024 * 1024 # 100 MB in bytes
16
+ MAX_RETRY_ATTEMPTS = 3
17
+ RETRY_DELAY_SECONDS = 2
16
18
 
17
19
  def initialize
18
20
  # Archive processor initialization
@@ -23,8 +25,8 @@ module OxTenderAbstract
23
25
  return Result.failure('Empty archive URL') if archive_url.nil? || archive_url.empty?
24
26
 
25
27
  begin
26
- # Download archive to memory
27
- download_result = download_to_memory(archive_url)
28
+ # Download archive to memory with retry logic
29
+ download_result = download_with_retry(archive_url)
28
30
  return download_result if download_result.failure?
29
31
 
30
32
  content = download_result.data[:content]
@@ -56,15 +58,52 @@ module OxTenderAbstract
56
58
  file_count: zip_result.size
57
59
  })
58
60
  else
61
+ # Log first bytes for debugging
62
+ log_error "Unknown archive format. First 10 bytes: #{content[0..9].unpack1('H*')}"
59
63
  Result.failure('Unknown archive format (not GZIP and not ZIP)')
60
64
  end
61
65
  rescue StandardError => e
66
+ log_error "Archive processing error: #{e.message}"
67
+ log_error e.backtrace.first(3).join("\n") if e.backtrace
62
68
  Result.failure("Archive processing error: #{e.message}")
63
69
  end
64
70
  end
65
71
 
66
72
  private
67
73
 
74
+ def download_with_retry(archive_url)
75
+ attempt = 1
76
+ last_error = nil
77
+
78
+ while attempt <= MAX_RETRY_ATTEMPTS
79
+ begin
80
+ log_info "Download attempt #{attempt}/#{MAX_RETRY_ATTEMPTS} for archive"
81
+ result = download_to_memory(archive_url)
82
+
83
+ if result.success?
84
+ log_info "Download successful on attempt #{attempt}"
85
+ return result
86
+ else
87
+ last_error = result.error
88
+ log_warn "Download attempt #{attempt} failed: #{last_error}"
89
+ end
90
+ rescue StandardError => e
91
+ last_error = e.message
92
+ log_error "Download attempt #{attempt} exception: #{last_error}"
93
+ end
94
+
95
+ if attempt < MAX_RETRY_ATTEMPTS
96
+ sleep_time = RETRY_DELAY_SECONDS * attempt
97
+ log_info "Waiting #{sleep_time} seconds before retry..."
98
+ sleep(sleep_time)
99
+ end
100
+
101
+ attempt += 1
102
+ end
103
+
104
+ Result.failure("Download failed after #{MAX_RETRY_ATTEMPTS} attempts. Last error: #{last_error}")
105
+ end
106
+
68
107
  def download_to_memory(url)
69
108
  begin
70
109
  uri = URI.parse(url)
@@ -81,12 +120,42 @@ module OxTenderAbstract
81
120
  request['User-Agent'] = "OxTenderAbstract/#{OxTenderAbstract::VERSION}"
82
121
  request['individualPerson_token'] = OxTenderAbstract.configuration.token
83
122
 
84
- log_debug "Downloading archive from: #{url}"
123
+ log_debug "Downloading archive from: #{url[0..100]}..."
85
124
 
86
125
  response = http.request(request)
87
126
 
127
+ # Enhanced error handling with response details
88
128
  unless response.is_a?(Net::HTTPSuccess)
89
- return Result.failure("HTTP error: #{response.code} #{response.message}")
129
+ error_msg = "HTTP error: #{response.code} #{response.message}"
130
+ if response.body && !response.body.empty?
131
+ # Log first part of response body for debugging
132
+ body_preview = response.body[0..500]
133
+ log_error "Response body preview: #{body_preview}"
134
+ error_msg += ". Response: #{body_preview[0..100]}"
135
+ end
136
+ return Result.failure(error_msg)
137
+ end
138
+
139
+ # Check for download blocking message in successful response
140
+ if response.body&.include?('Скачивание архива по данной ссылке заблокировано')
141
+ if OxTenderAbstract.configuration.auto_wait_on_block
142
+ wait_time = OxTenderAbstract.configuration.block_wait_time
143
+ log_error "Archive download blocked. Auto-waiting for #{wait_time} seconds..."
144
+
145
+ # Показываем прогресс ожидания
146
+ show_wait_progress(wait_time)
147
+
148
+ log_info 'Wait completed, retrying download...'
149
+ # Рекурсивно повторяем попытку после ожидания
150
+ return download_to_memory(url)
151
+ else
152
+ log_error 'Archive download blocked for 10 minutes'
153
+ return Result.failure(
154
+ 'Archive download blocked for 10 minutes',
155
+ error_type: :blocked,
156
+ retry_after: 600
157
+ )
158
+ end
90
159
  end
91
160
 
92
161
  content = response.body
@@ -96,7 +165,9 @@ module OxTenderAbstract
96
165
  return Result.failure("Archive too large: #{size} bytes (max: #{MAX_FILE_SIZE_BYTES})")
97
166
  end
98
167
 
99
- log_debug "Downloaded archive: #{size} bytes"
168
+ return Result.failure('Empty archive downloaded') if size == 0
169
+
170
+ log_debug "Downloaded archive: #{size} bytes, content-type: #{response['content-type']}"
100
171
 
101
172
  Result.success({
102
173
  content: content,
@@ -106,6 +177,7 @@ module OxTenderAbstract
106
177
  rescue SocketError, Timeout::Error => e
107
178
  Result.failure("Network error: #{e.message}")
108
179
  rescue StandardError => e
180
+ log_error "Download error details: #{e.class} - #{e.message}"
109
181
  Result.failure("Download error: #{e.message}")
110
182
  end
111
183
  end
@@ -116,6 +188,10 @@ module OxTenderAbstract
116
188
  http.verify_mode = OxTenderAbstract.configuration.ssl_verify ? OpenSSL::SSL::VERIFY_PEER : OpenSSL::SSL::VERIFY_NONE
117
189
  http.open_timeout = OxTenderAbstract.configuration.timeout_open
118
190
  http.read_timeout = OxTenderAbstract.configuration.timeout_read
191
+
192
+ # Add debug logging for HTTP client configuration
193
+ log_debug "HTTP client config: SSL=#{http.use_ssl?}, verify=#{http.verify_mode}, open_timeout=#{http.open_timeout}, read_timeout=#{http.read_timeout}"
194
+
119
195
  http
120
196
  end
121
197
 
@@ -126,19 +202,23 @@ module OxTenderAbstract
126
202
  decompressed_content = gz.read
127
203
  gz.close
128
204
 
205
+ log_debug "GZIP decompression: #{gzip_content.bytesize} -> #{decompressed_content.bytesize} bytes"
206
+
129
207
  Result.success({
130
208
  content: decompressed_content,
131
209
  compressed_size: gzip_content.bytesize,
132
210
  decompressed_size: decompressed_content.bytesize
133
211
  })
134
212
  rescue Zlib::GzipFile::Error => e
213
+ log_error "GZIP decompression error: #{e.message}"
135
214
  Result.failure("GZIP decompression error: #{e.message}")
136
215
  rescue StandardError => e
216
+ log_error "Decompression error: #{e.message}"
137
217
  Result.failure("Decompression error: #{e.message}")
138
218
  end
139
219
 
140
220
  def extract_zip_from_memory(zip_content)
141
- log_debug 'Extracting ZIP archive from memory'
221
+ log_debug "Extracting ZIP archive from memory (#{zip_content.bytesize} bytes)"
142
222
 
143
223
  files = {}
144
224
  zip_io = StringIO.new(zip_content)
@@ -149,23 +229,63 @@ module OxTenderAbstract
149
229
 
150
230
  log_debug "Extracting file: #{entry.name} (#{entry.size} bytes)"
151
231
 
152
- content = entry.get_input_stream.read
153
-
154
- files[entry.name] = {
155
- content: content,
156
- size: entry.size,
157
- compressed_size: entry.compressed_size,
158
- crc: entry.crc
159
- }
232
+ begin
233
+ content = entry.get_input_stream.read
234
+
235
+ files[entry.name] = {
236
+ content: content,
237
+ size: entry.size,
238
+ compressed_size: entry.compressed_size,
239
+ crc: entry.crc
240
+ }
241
+ rescue StandardError => e
242
+ log_error "Error extracting file #{entry.name}: #{e.message}"
243
+ # Continue with other files instead of failing completely
244
+ end
160
245
  end
161
246
  end
162
247
 
163
248
  log_debug "Extracted #{files.size} files from ZIP archive"
164
249
  files
165
250
  rescue Zip::Error => e
251
+ log_error "ZIP extraction error: #{e.message}"
166
252
  raise ArchiveError, "ZIP extraction error: #{e.message}"
167
253
  rescue StandardError => e
254
+ log_error "Archive extraction error: #{e.message}"
255
+ log_error e.backtrace.first(3).join("\n") if e.backtrace
168
256
  raise ArchiveError, "Archive extraction error: #{e.message}"
169
257
  end
258
+
259
+ # Show wait progress during API block
260
+ def show_wait_progress(total_seconds)
261
+ return if total_seconds <= 0
262
+
263
+ log_info "Waiting #{total_seconds} seconds for API block to expire..."
264
+
265
+ # Показываем прогресс каждые 30 секунд для больших интервалов
266
+ if total_seconds > 60
267
+ intervals = [30, 60, 120, 180, 300].select { |i| i < total_seconds }
268
+
269
+ intervals.each do |interval|
270
+ sleep(interval)
271
+ remaining = total_seconds - interval
272
+ total_seconds = remaining
273
+
274
+ if remaining > 60
275
+ log_info "Still waiting... #{remaining} seconds remaining (#{(remaining / 60.0).round(1)} minutes)"
276
+ else
277
+ log_info "Still waiting... #{remaining} seconds remaining"
278
+ end
279
+ end
280
+
281
+ # Ждем оставшееся время
282
+ sleep(total_seconds) if total_seconds > 0
283
+ else
284
+ # Для коротких интервалов просто ждем
285
+ sleep(total_seconds)
286
+ end
287
+
288
+ log_info 'Wait period completed!'
289
+ end
170
290
  end
171
291
  end
@@ -77,7 +77,7 @@ module OxTenderAbstract
77
77
  # Search tenders with full workflow: API -> Archive -> Parse
78
78
  def search_tenders(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
79
79
  document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE)
80
- log_info "Starting tender search for region #{org_region}, date #{exact_date}"
80
+ log_info "Starting tender search for region #{org_region}, date #{exact_date}, subsystem: #{subsystem_type}, type: #{document_type}"
81
81
 
82
82
  # Step 1: Get archive URLs from API
83
83
  api_result = get_docs_by_region(
@@ -94,44 +94,69 @@ module OxTenderAbstract
94
94
 
95
95
  log_info "Found #{archive_urls.size} archives to process"
96
96
 
97
- # Step 2: Process each archive
97
+ # Step 2: Process each archive with error resilience
98
98
  all_tenders = []
99
99
  total_files = 0
100
+ processed_archives = 0
101
+ failed_archives = 0
100
102
 
101
103
  archive_urls.each_with_index do |archive_url, index|
102
104
  log_info "Processing archive #{index + 1}/#{archive_urls.size}"
103
105
 
104
- archive_result = download_archive_data(archive_url)
105
- next if archive_result.failure?
106
+ begin
107
+ archive_result = download_archive_data(archive_url)
106
108
 
107
- files = archive_result.data[:files]
108
- total_files += files.size
109
+ if archive_result.failure?
110
+ log_error "Failed to download archive #{index + 1}: #{archive_result.error}"
111
+ failed_archives += 1
112
+ next
113
+ end
109
114
 
110
- # Step 3: Parse XML files from archive
111
- xml_files = files.select { |name, _| name.downcase.end_with?('.xml') }
115
+ processed_archives += 1
116
+ files = archive_result.data[:files]
117
+ total_files += files.size
112
118
 
113
- xml_files.each do |file_name, file_data|
114
- parse_result = parse_xml_document(file_data[:content])
115
- next if parse_result.failure?
116
- next unless parse_result.data[:document_type] == :tender
119
+ # Step 3: Parse XML files from archive
120
+ xml_files = files.select { |name, _| name.downcase.end_with?('.xml') }
121
+ log_debug "Found #{xml_files.size} XML files in archive #{index + 1}"
117
122
 
118
- tender_data = parse_result.data[:content]
119
- next if tender_data[:reestr_number].nil? || tender_data[:reestr_number].empty?
123
+ xml_files.each do |file_name, file_data|
124
+ parse_result = parse_xml_document(file_data[:content])
120
125
 
121
- # Add metadata
122
- tender_data[:source_file] = file_name
123
- tender_data[:archive_url] = archive_url
124
- tender_data[:processed_at] = Time.now
126
+ if parse_result.failure?
127
+ log_debug "Failed to parse #{file_name}: #{parse_result.error}"
128
+ next
129
+ end
125
130
 
126
- all_tenders << tender_data
131
+ next unless parse_result.data[:document_type] == :tender
132
+
133
+ tender_data = parse_result.data[:content]
134
+ next if tender_data[:reestr_number].nil? || tender_data[:reestr_number].empty?
135
+
136
+ # Add metadata
137
+ tender_data[:source_file] = file_name
138
+ tender_data[:archive_url] = archive_url
139
+ tender_data[:processed_at] = Time.now
140
+
141
+ all_tenders << tender_data
142
+ rescue StandardError => e
143
+ log_error "Error processing file #{file_name}: #{e.message}"
144
+ # Continue with other files
145
+ end
146
+ rescue StandardError => e
147
+ log_error "Critical error processing archive #{index + 1}: #{e.message}"
148
+ failed_archives += 1
149
+ # Continue with other archives
127
150
  end
128
151
  end
129
152
 
130
- log_info "Search completed. Found #{all_tenders.size} tenders in #{total_files} files"
153
+ log_info "Search completed. Processed: #{processed_archives}/#{archive_urls.size} archives, Failed: #{failed_archives}, Found #{all_tenders.size} tenders in #{total_files} files"
131
154
 
132
155
  Result.success({
133
156
  tenders: all_tenders,
134
157
  total_archives: archive_urls.size,
158
+ processed_archives: processed_archives,
159
+ failed_archives: failed_archives,
135
160
  total_files: total_files,
136
161
  processed_at: Time.now
137
162
  })
@@ -211,6 +236,131 @@ module OxTenderAbstract
211
236
  })
212
237
  end
213
238
 
239
+ # Search tenders with automatic resume capability
240
+ # Позволяет продолжить загрузку с места паузы при блокировках API
241
+ def search_tenders_with_resume(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
242
+ document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE,
243
+ start_from_archive: 0, resume_state: nil)
244
+ log_info "Starting tender search with resume capability for region #{org_region}, date #{exact_date}"
245
+ log_info "Starting from archive #{start_from_archive}" if start_from_archive > 0
246
+
247
+ # Восстанавливаем состояние если есть
248
+ if resume_state
249
+ log_info "Resuming from previous state: #{resume_state[:processed_archives]} archives processed"
250
+ all_tenders = resume_state[:tenders] || []
251
+ total_files = resume_state[:total_files] || 0
252
+ processed_archives = resume_state[:processed_archives] || 0
253
+ failed_archives = resume_state[:failed_archives] || 0
254
+ archive_urls = resume_state[:archive_urls]
255
+ else
256
+ # Step 1: Get archive URLs from API
257
+ api_result = get_docs_by_region(
258
+ org_region: org_region,
259
+ subsystem_type: subsystem_type,
260
+ document_type: document_type,
261
+ exact_date: exact_date
262
+ )
263
+
264
+ return api_result if api_result.failure?
265
+
266
+ archive_urls = api_result.data[:archive_urls]
267
+ return Result.success({ tenders: [], total_archives: 0, total_files: 0 }) if archive_urls.empty?
268
+
269
+ all_tenders = []
270
+ total_files = 0
271
+ processed_archives = 0
272
+ failed_archives = 0
273
+ end
274
+
275
+ log_info "Found #{archive_urls.size} archives to process (starting from #{start_from_archive})"
276
+
277
+ # Step 2: Process archives starting from specified position
278
+ (start_from_archive...archive_urls.size).each do |index|
279
+ archive_url = archive_urls[index]
280
+ log_info "Processing archive #{index + 1}/#{archive_urls.size}"
281
+
282
+ begin
283
+ archive_result = download_archive_data(archive_url)
284
+
285
+ if archive_result.failure?
286
+ # Проверяем, была ли блокировка с автоматическим ожиданием
287
+ if archive_result.metadata[:error_type] == :blocked &&
288
+ !OxTenderAbstract.configuration.auto_wait_on_block
289
+ # Возвращаем состояние для возможности продолжения
290
+ resume_state = {
291
+ tenders: all_tenders,
292
+ total_files: total_files,
293
+ processed_archives: processed_archives,
294
+ failed_archives: failed_archives,
295
+ archive_urls: archive_urls,
296
+ next_archive_index: index
297
+ }
298
+
299
+ return Result.failure(
300
+ "Archive download blocked, can resume from archive #{index + 1}",
301
+ error_type: :blocked,
302
+ retry_after: 600,
303
+ resume_state: resume_state
304
+ )
305
+ else
306
+ log_error "Failed to download archive #{index + 1}: #{archive_result.error}"
307
+ failed_archives += 1
308
+ next
309
+ end
310
+ end
311
+
312
+ processed_archives += 1
313
+ files = archive_result.data[:files]
314
+ total_files += files.size
315
+
316
+ # Step 3: Parse XML files from archive
317
+ xml_files = files.select { |name, _| name.downcase.end_with?('.xml') }
318
+ log_debug "Found #{xml_files.size} XML files in archive #{index + 1}"
319
+
320
+ xml_files.each do |file_name, file_data|
321
+ parse_result = parse_xml_document(file_data[:content])
322
+
323
+ if parse_result.failure?
324
+ log_debug "Failed to parse #{file_name}: #{parse_result.error}"
325
+ next
326
+ end
327
+
328
+ next unless parse_result.data[:document_type] == :tender
329
+
330
+ tender_data = parse_result.data[:content]
331
+ next if tender_data[:reestr_number].nil? || tender_data[:reestr_number].empty?
332
+
333
+ # Add metadata
334
+ tender_data[:source_file] = file_name
335
+ tender_data[:archive_url] = archive_url
336
+ tender_data[:processed_at] = Time.now
337
+ tender_data[:archive_index] = index
338
+
339
+ all_tenders << tender_data
340
+ rescue StandardError => e
341
+ log_error "Error processing file #{file_name}: #{e.message}"
342
+ # Continue with other files
343
+ end
344
+ rescue StandardError => e
345
+ log_error "Critical error processing archive #{index + 1}: #{e.message}"
346
+ failed_archives += 1
347
+ # Continue with other archives
348
+ end
349
+ end
350
+
351
+ log_info "Search completed. Processed: #{processed_archives}/#{archive_urls.size} archives, Failed: #{failed_archives}, Found #{all_tenders.size} tenders in #{total_files} files"
352
+
353
+ Result.success({
354
+ tenders: all_tenders,
355
+ total_archives: archive_urls.size,
356
+ processed_archives: processed_archives,
357
+ failed_archives: failed_archives,
358
+ total_files: total_files,
359
+ processed_at: Time.now,
360
+ completed: true
361
+ })
362
+ end
363
+
214
364
  private
215
365
 
216
366
  def validate_token!
@@ -5,7 +5,8 @@ require 'logger'
5
5
  module OxTenderAbstract
6
6
  # Configuration for the library
7
7
  class Configuration
8
- attr_accessor :token, :timeout_open, :timeout_read, :ssl_verify
8
+ attr_accessor :token, :timeout_open, :timeout_read, :ssl_verify,
9
+ :auto_wait_on_block, :block_wait_time, :max_wait_time
9
10
  attr_writer :wsdl_url, :logger
10
11
 
11
12
  def initialize
@@ -15,6 +16,9 @@ module OxTenderAbstract
15
16
  @ssl_verify = false
16
17
  @wsdl_url = nil # Will be set later
17
18
  @logger = nil # Will be set later
19
+ @auto_wait_on_block = true # Автоматически ждать при блокировке
20
+ @block_wait_time = 610 # Время ожидания при блокировке (10 мин + 10 сек)
21
+ @max_wait_time = 900 # Максимальное время ожидания (15 мин)
18
22
  end
19
23
 
20
24
  def wsdl_url
@@ -16,8 +16,8 @@ module OxTenderAbstract
16
16
  CONTRACT_EXECUTION_REPORT TENDER_NOTICE TENDER_DOCUMENTATION
17
17
  ].freeze
18
18
 
19
- # Electronic notification types
20
- ELECTRONIC_NOTIFICATION_TYPES = %w[
19
+ # Electronic notification types for 44-FZ
20
+ ELECTRONIC_NOTIFICATION_TYPES_44FZ = %w[
21
21
  epNotificationEF2020 epNotificationEF epNotificationOK2020
22
22
  epNotificationEP2020 epNotificationZK2020 epNotificationZP2020
23
23
  epNotificationISM2020 fcsNotificationEF fcsNotificationOK
@@ -25,10 +25,80 @@ module OxTenderAbstract
25
25
  fcsNotificationISM fcsPlacement fcsPlacementResult
26
26
  ].freeze
27
27
 
28
+ # Electronic notification types for 223-FZ
29
+ ELECTRONIC_NOTIFICATION_TYPES_223FZ = %w[
30
+ epNotification223 notification223 purchaseNotice223
31
+ purchaseNoticeEA223 purchaseNoticeZK223 purchaseNoticeZP223
32
+ purchaseNoticeOK223 purchaseNoticeIS223 contractNotice223
33
+ contractExecutionNotice223 purchasePlan223
34
+ ].freeze
35
+
36
+ # Electronic notification types for regional and municipal
37
+ ELECTRONIC_NOTIFICATION_TYPES_REGIONAL = %w[
38
+ epNotificationRP epNotificationRPGZ notificationRP
39
+ notificationRPGZ purchaseNoticeRP purchaseNoticeRPGZ
40
+ contractNoticeRP contractNoticeRPGZ
41
+ ].freeze
42
+
43
+ # All supported electronic notification types
44
+ ELECTRONIC_NOTIFICATION_TYPES = (
45
+ ELECTRONIC_NOTIFICATION_TYPES_44FZ +
46
+ ELECTRONIC_NOTIFICATION_TYPES_223FZ +
47
+ ELECTRONIC_NOTIFICATION_TYPES_REGIONAL
48
+ ).freeze
49
+
28
50
  # Default settings
29
51
  DEFAULT_SUBSYSTEM = 'PRIZ'
30
52
  DEFAULT_DOCUMENT_TYPE = 'epNotificationEF2020'
31
53
 
54
+ # Subsystem descriptions
55
+ SUBSYSTEM_DESCRIPTIONS = {
56
+ 'PRIZ' => '44-ФЗ - Основные закупки федеральных органов',
57
+ 'OD223' => '223-ФЗ - Закупки отдельных видов юридических лиц',
58
+ 'RD223' => '223-ФЗ - Реестр договоров',
59
+ 'RPEC' => 'Закупки субъектов РФ',
60
+ 'RPGZ' => 'Муниципальные закупки',
61
+ 'RGK' => 'Закупки государственных корпораций',
62
+ 'BTK' => 'Закупки бюджетных, автономных учреждений',
63
+ 'UR' => 'Закупки субъектов естественных монополий',
64
+ 'RJ' => 'Закупки для нужд судебной системы',
65
+ 'RDI' => 'Закупки для нужд дошкольных образовательных учреждений',
66
+ 'RPKLKP' => 'Закупки для нужд подведомственных Калининградской области',
67
+ 'RPNZ' => 'Закупки для нужд образовательных учреждений НЗО',
68
+ 'EA' => 'Электронные аукционы',
69
+ 'REC' => 'Реестр недобросовестных поставщиков',
70
+ 'RPP' => 'Реестр поставщиков',
71
+ 'RVP' => 'Реестр внутренних поставщиков',
72
+ 'RRK' => 'Реестр результатов контроля',
73
+ 'RRA' => 'Реестр результатов аудита',
74
+ 'RNP' => 'Реестр нарушений при проведении закупок',
75
+ 'RKPO' => 'Реестр контрольно-проверочных организаций'
76
+ }.freeze
77
+
78
+ # Get appropriate document types for subsystem
79
+ def self.document_types_for_subsystem(subsystem_type)
80
+ case subsystem_type
81
+ when 'PRIZ', 'RPEC', 'RPGZ', 'RGK', 'BTK', 'UR', 'RJ', 'RDI'
82
+ ELECTRONIC_NOTIFICATION_TYPES_44FZ
83
+ when 'OD223', 'RD223'
84
+ ELECTRONIC_NOTIFICATION_TYPES_223FZ + ELECTRONIC_NOTIFICATION_TYPES_44FZ
85
+ when /RP/
86
+ ELECTRONIC_NOTIFICATION_TYPES_REGIONAL + ELECTRONIC_NOTIFICATION_TYPES_44FZ
87
+ else
88
+ ELECTRONIC_NOTIFICATION_TYPES_44FZ
89
+ end
90
+ end
91
+
92
+ # Check if subsystem supports document type
93
+ def self.subsystem_supports_document_type?(subsystem_type, document_type)
94
+ document_types_for_subsystem(subsystem_type).include?(document_type)
95
+ end
96
+
97
+ # Get description for subsystem
98
+ def self.description_for_subsystem(subsystem_type)
99
+ SUBSYSTEM_DESCRIPTIONS[subsystem_type] || "Подсистема #{subsystem_type}"
100
+ end
101
+
32
102
  # API configuration
33
103
  API_CONFIG = {
34
104
  wsdl: 'https://int44.zakupki.gov.ru/eis-integration/services/getDocsIP?wsdl',
@@ -7,18 +7,30 @@ module OxTenderAbstract
7
7
  # Configuration related errors
8
8
  class ConfigurationError < Error; end
9
9
 
10
- # Network related errors
11
- class NetworkError < Error; end
10
+ # API related errors
11
+ class ApiError < Error; end
12
12
 
13
- # SOAP API related errors
14
- class SoapError < Error; end
13
+ # Archive processing errors
14
+ class ArchiveError < Error; end
15
15
 
16
- # XML parsing related errors
16
+ # XML parsing errors
17
17
  class ParseError < Error; end
18
18
 
19
- # Archive processing related errors
20
- class ArchiveError < Error; end
19
+ # Network related errors
20
+ class NetworkError < Error; end
21
+
22
+ # Archive download blocked error (10 minute block)
23
+ class ArchiveBlockedError < ArchiveError
24
+ attr_reader :blocked_until, :retry_after_seconds
25
+
26
+ def initialize(message = 'Archive download blocked', retry_after_seconds = 600)
27
+ super(message)
28
+ @retry_after_seconds = retry_after_seconds
29
+ @blocked_until = Time.now + retry_after_seconds
30
+ end
21
31
 
22
- # Authentication related errors
23
- class AuthenticationError < Error; end
32
+ def can_retry_at
33
+ @blocked_until
34
+ end
35
+ end
24
36
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OxTenderAbstract
4
- VERSION = '0.9.2'
4
+ VERSION = '0.9.3'
5
5
  end
@@ -486,36 +486,57 @@ module OxTenderAbstract
486
486
  total_sum = nil
487
487
 
488
488
  begin
489
- # Find purchase objects nodes - use more defensive approach
489
+ # Find purchase objects nodes - including drug and non-drug objects
490
+ # Regular purchase objects
490
491
  purchase_object_nodes = doc.xpath(
491
492
  '//ns5:purchaseObjectsInfo//ns4:purchaseObject | //purchaseObjectsInfo//purchaseObject', namespaces
492
493
  )
493
494
 
495
+ # Drug purchase objects (лекарственные препараты)
496
+ drug_object_nodes = doc.xpath(
497
+ '//ns5:drugPurchaseObjectsInfo//ns4:drugPurchaseObjectInfo | //drugPurchaseObjectsInfo//drugPurchaseObjectInfo', namespaces
498
+ )
499
+
500
+ # Process regular purchase objects
494
501
  purchase_objects = purchase_object_nodes.map do |object_node|
495
502
  extract_purchase_object_data(object_node, namespaces)
496
503
  end.compact
497
504
 
498
- # Extract total sum from purchaseObjectsInfo
505
+ # Process drug purchase objects
506
+ drug_objects = drug_object_nodes.map do |drug_node|
507
+ extract_drug_purchase_object_data(drug_node, namespaces)
508
+ end.compact
509
+
510
+ # Combine all objects
511
+ all_objects = purchase_objects + drug_objects
512
+
513
+ # Extract total sum from various sources
499
514
  total_sum = extract_price_from_text(find_text_with_namespaces(doc, [
500
515
  '//ns5:purchaseObjectsInfo//ns4:totalSum',
501
516
  '//purchaseObjectsInfo//totalSum',
502
517
  '//ns5:notDrugPurchaseObjectsInfo/ns4:totalSum',
503
- '//notDrugPurchaseObjectsInfo/totalSum'
518
+ '//notDrugPurchaseObjectsInfo/totalSum',
519
+ '//ns5:drugPurchaseObjectsInfo/ns4:total',
520
+ '//drugPurchaseObjectsInfo/total'
504
521
  ], namespaces))
505
522
 
506
523
  # Extract quantity undefined flag
507
524
  quantity_undefined = find_text_with_namespaces(doc, [
508
525
  '//ns5:purchaseObjectsInfo//ns5:quantityUndefined',
509
- '//purchaseObjectsInfo//quantityUndefined'
526
+ '//purchaseObjectsInfo//quantityUndefined',
527
+ '//ns5:drugPurchaseObjectsInfo//ns5:quantityUndefined',
528
+ '//drugPurchaseObjectsInfo//quantityUndefined'
510
529
  ], namespaces) == 'true'
511
530
 
512
- return {} if purchase_objects.empty? && total_sum.nil?
531
+ return {} if all_objects.empty? && total_sum.nil?
513
532
 
514
533
  {
515
- objects: purchase_objects,
516
- objects_count: purchase_objects.size,
534
+ objects: all_objects,
535
+ objects_count: all_objects.size,
517
536
  total_sum: total_sum,
518
- quantity_undefined: quantity_undefined
537
+ quantity_undefined: quantity_undefined,
538
+ drug_objects_count: drug_objects.size,
539
+ regular_objects_count: purchase_objects.size
519
540
  }.compact
520
541
  rescue StandardError => e
521
542
  log_debug "Error extracting purchase objects: #{e.message}"
@@ -632,6 +653,106 @@ module OxTenderAbstract
632
653
  object_data.compact
633
654
  end
634
655
 
656
+ def extract_drug_purchase_object_data(drug_node, namespaces)
657
+ # Extract data from drug purchase object info
658
+ drug_data = {
659
+ sid: extract_text_from_node(drug_node, './/ns4:sid | .//sid'),
660
+ external_sid: extract_text_from_node(drug_node, './/ns4:externalSid | .//externalSid'),
661
+ name: extract_text_from_node(drug_node, './/ns4:name | .//name'),
662
+ price: extract_price_from_text(extract_text_from_node(drug_node, './/ns4:price | .//price')),
663
+ quantity: extract_text_from_node(drug_node, './/ns4:quantity/ns4:value | .//quantity/value')&.to_i,
664
+ sum: extract_price_from_text(extract_text_from_node(drug_node, './/ns4:sum | .//sum')),
665
+ type: 'drug', # Mark as drug object
666
+ hierarchy_type: extract_text_from_node(drug_node, './/ns4:hierarchyType | .//hierarchyType')
667
+ }
668
+
669
+ # Extract INN (International Nonproprietary Name) for drugs
670
+ inn_node = drug_node.at_xpath('.//ns4:INN | .//INN', namespaces)
671
+ if inn_node
672
+ drug_data[:inn] = {
673
+ code: extract_text_from_node(inn_node, './/ns2:code | .//code'),
674
+ name: extract_text_from_node(inn_node, './/ns2:name | .//name')
675
+ }
676
+ end
677
+
678
+ # Extract dosage form information
679
+ dosage_form_node = drug_node.at_xpath('.//ns4:dosageForm | .//dosageForm', namespaces)
680
+ if dosage_form_node
681
+ drug_data[:dosage_form] = {
682
+ code: extract_text_from_node(dosage_form_node, './/ns2:code | .//code'),
683
+ name: extract_text_from_node(dosage_form_node, './/ns2:name | .//name')
684
+ }
685
+ end
686
+
687
+ # OKPD2 information for drugs
688
+ okpd2_node = drug_node.at_xpath('.//ns4:OKPD2 | .//OKPD2', namespaces)
689
+ if okpd2_node
690
+ drug_data[:okpd2] = {
691
+ code: extract_text_from_node(okpd2_node, './/ns2:OKPDCode | .//OKPDCode'),
692
+ name: extract_text_from_node(okpd2_node, './/ns2:OKPDName | .//OKPDName')
693
+ }
694
+ end
695
+
696
+ # OKEI information (units of measurement)
697
+ okei_node = drug_node.at_xpath('.//ns4:OKEI | .//OKEI', namespaces)
698
+ if okei_node
699
+ drug_data[:okei] = {
700
+ code: extract_text_from_node(okei_node, './/ns2:code | .//code'),
701
+ national_code: extract_text_from_node(okei_node, './/ns2:nationalCode | .//nationalCode'),
702
+ name: extract_text_from_node(okei_node, './/ns2:name | .//name')
703
+ }
704
+ end
705
+
706
+ # Extract characteristics for drugs
707
+ characteristics_nodes = drug_node.xpath(
708
+ './/ns4:characteristics//ns4:characteristicsUsingReferenceInfo | .//characteristics//characteristicsUsingReferenceInfo', namespaces
709
+ )
710
+ characteristics_nodes += drug_node.xpath(
711
+ './/ns4:characteristics//ns4:characteristicsUsingTextForm | .//characteristics//characteristicsUsingTextForm', namespaces
712
+ )
713
+
714
+ if characteristics_nodes.any?
715
+ characteristics_details = characteristics_nodes.map do |char_node|
716
+ char_data = {
717
+ name: extract_text_from_node(char_node, './/ns4:name | .//name'),
718
+ type: extract_text_from_node(char_node, './/ns4:type | .//type')
719
+ }
720
+
721
+ # Extract values from text form characteristics
722
+ values_nodes = char_node.xpath('.//ns4:values/ns4:value | .//values/value', namespaces)
723
+ if values_nodes.any?
724
+ char_data[:values] = values_nodes.map do |value_node|
725
+ extract_text_from_node(value_node, './/ns4:qualityDescription | .//qualityDescription') ||
726
+ extract_text_from_node(value_node, './/ns4:textValue | .//textValue')
727
+ end.compact
728
+ end
729
+
730
+ char_data
731
+ end
732
+
733
+ drug_data[:characteristics] = {
734
+ count: characteristics_nodes.size,
735
+ details: characteristics_details
736
+ }
737
+ end
738
+
739
+ # Determine the product name
740
+ product_name = if drug_data[:name] && !drug_data[:name].empty?
741
+ drug_data[:name]
742
+ elsif drug_data[:inn] && drug_data[:inn][:name] && !drug_data[:inn][:name].empty?
743
+ drug_data[:inn][:name]
744
+ elsif drug_data[:okpd2] && drug_data[:okpd2][:name] && !drug_data[:okpd2][:name].empty?
745
+ drug_data[:okpd2][:name]
746
+ else
747
+ 'Unknown drug'
748
+ end
749
+
750
+ drug_data[:product_name] = product_name
751
+ drug_data[:name_type] = 'drug_name'
752
+
753
+ drug_data.compact
754
+ end
755
+
635
756
  private
636
757
 
637
758
  def determine_name_type(name)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ox-tender-abstract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.2
4
+ version: 0.9.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - smolev