ox-tender-abstract 0.9.2 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c1e519805302809f117b67e75099300485fc5910038e6ab6012eb9a4343f8e0b
4
- data.tar.gz: ffccc77f4305ca644cbe5128a2f1178e8ff30170b25180686fc3db12d5a19761
3
+ metadata.gz: cf4b74ab15f633cbfb3ad65d37a5fa900016e45e01f1bb300284b28609d805d7
4
+ data.tar.gz: 53564ab6b3af3c30998b241ee69a899875394833bdce28f2e2ad765edd81dfca
5
5
  SHA512:
6
- metadata.gz: d062a1e8139143c3c86490c54b9e367f9e6053d27e2a981f8202b2496d187d56f11e2b1b2f101c1c3b9ae69045e5388f34365f9d88e77f480177603c8fd7a1f8
7
- data.tar.gz: 57710ad7e471c7165d2878b7ffcbac5a2fbd6ffba10bc9c6c2a47af4015ca639df1d53f1cf89956858838dce7d18efdfca811a4d5f5bef2d53fb46c6a4fa14b7
6
+ metadata.gz: d122d953f825672d6b61d551f49c08a669eaeb4ef60b55d4c718c4d9dd9ee68e99d83a026745ff77451963ed30957a0ef684bcb67300574dab9f7e18e64e22c7
7
+ data.tar.gz: 7396189e5e44a5c3fa5b9e988b6ed0394072b1b7fadacfed60df6ebe36afd741685efa3f46978912028c561f4c0aa5820a85560bf5222134fab840815a88c25a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,18 @@
1
+ ## [0.9.4] - 2025-07-28
2
+
3
+ - include_attachments
4
+
5
+ ## [0.9.3] - 2025-07-27
6
+
7
+ - Added support for parsing tender documents
8
+ - Added support for parsing contract documents
9
+ - Added support for parsing organization documents
10
+ - Added support for parsing generic documents
11
+ - Added support for parsing attachments
12
+ - Added support for parsing tender documents
13
+ - Added support for parsing contract documents
14
+ - Added support for parsing organization documents
15
+
1
16
  ## [0.9.0] - 2025-07-15
2
17
 
3
18
  - Initial release
data/README.md CHANGED
@@ -273,6 +273,41 @@ puts result.data[:total_archives] # => 6
273
273
  # Processing typically takes 10-15 seconds for a full day's data
274
274
  ```
275
275
 
276
+ ## Error Handling
277
+
278
+ The library uses the `Result` pattern for error handling:
279
+
280
+ ```ruby
281
+ result = OxTenderAbstract.search_tenders(org_region: '77', exact_date: '2024-01-01')
282
+
283
+ if result.success?
284
+ puts "Found tenders: #{result.data[:tenders].size}"
285
+ else
286
+ puts "Error: #{result.error}"
287
+
288
+ # Check error type for special handling
289
+ if result.metadata[:error_type] == :blocked
290
+ retry_after = result.metadata[:retry_after] || 600
291
+ puts "API blocked for #{retry_after} seconds"
292
+ end
293
+ end
294
+ ```
295
+
296
+ ### Handling API Blocks
297
+
298
+ When making frequent requests, the API may block archive downloads for 10 minutes. The library automatically detects such blocks:
299
+
300
+ ```ruby
301
+ result = OxTenderAbstract.search_tenders(org_region: '77', exact_date: '2024-01-01')
302
+
303
+ if result.failure? && result.metadata[:error_type] == :blocked
304
+ retry_after = result.metadata[:retry_after] # 600 seconds (10 minutes)
305
+ puts "Download blocked, retry in #{retry_after} seconds"
306
+ end
307
+ ```
308
+
309
+ For detailed guidance on using with Sidekiq background jobs, see [SIDEKIQ_USAGE.md](SIDEKIQ_USAGE.md).
310
+
276
311
  ## Requirements
277
312
 
278
313
  - Ruby >= 3.0.0
@@ -1,39 +1,178 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'oxtenderabstract/version'
4
- require_relative 'oxtenderabstract/logger'
4
+ require_relative 'oxtenderabstract/configuration'
5
5
  require_relative 'oxtenderabstract/errors'
6
+ require_relative 'oxtenderabstract/logger'
6
7
  require_relative 'oxtenderabstract/result'
7
8
  require_relative 'oxtenderabstract/document_types'
8
- require_relative 'oxtenderabstract/configuration'
9
- require_relative 'oxtenderabstract/xml_parser'
10
9
  require_relative 'oxtenderabstract/archive_processor'
10
+ require_relative 'oxtenderabstract/xml_parser'
11
11
  require_relative 'oxtenderabstract/client'
12
12
 
13
13
  # Main module for OxTenderAbstract library
14
14
  module OxTenderAbstract
15
15
  class Error < StandardError; end
16
16
 
17
- # Convenience method to create a new client
18
- def self.client(token: nil)
19
- Client.new(token: token)
20
- end
17
+ class << self
18
+ def configure
19
+ yield(configuration)
20
+ end
21
21
 
22
- # Search tenders by region and date (convenience method)
23
- def self.search_tenders(org_region:, exact_date:, token: nil, **options)
24
- client = Client.new(token: token)
25
- client.search_tenders(org_region: org_region, exact_date: exact_date, **options)
26
- end
22
+ def configuration
23
+ @configuration ||= Configuration.new
24
+ end
27
25
 
28
- # Enhanced search tenders with detailed information (convenience method)
29
- def self.enhanced_search_tenders(org_region:, exact_date:, token: nil, **options)
30
- client = Client.new(token: token)
31
- client.enhanced_search_tenders(org_region: org_region, exact_date: exact_date, **options)
32
- end
26
+ def reset_configuration!
27
+ @configuration = nil
28
+ end
29
+
30
+ # Convenience method for searching tenders in specific subsystem
31
+ def search_tenders(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
32
+ document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE, include_attachments: true)
33
+ client = Client.new
34
+ client.search_tenders(
35
+ org_region: org_region,
36
+ exact_date: exact_date,
37
+ subsystem_type: subsystem_type,
38
+ document_type: document_type,
39
+ include_attachments: include_attachments
40
+ )
41
+ end
42
+
43
+ # Enhanced method for searching tenders across multiple subsystems
44
+ def search_all_tenders(org_region:, exact_date:, subsystems: nil, document_types: nil, include_attachments: true)
45
+ # Default subsystems to search
46
+ subsystems ||= %w[PRIZ RPEC RPGZ BTK UR RGK OD223 RD223]
47
+
48
+ client = Client.new
49
+ all_results = {}
50
+ total_tenders = []
51
+ total_archives = 0
52
+
53
+ subsystems.each do |subsystem_type|
54
+ # Get appropriate document types for this subsystem
55
+ available_types = DocumentTypes.document_types_for_subsystem(subsystem_type)
56
+ test_types = document_types || [available_types.first] # Test first type by default
57
+
58
+ subsystem_results = {
59
+ subsystem: subsystem_type,
60
+ description: DocumentTypes.description_for_subsystem(subsystem_type),
61
+ tenders: [],
62
+ archives: 0,
63
+ errors: []
64
+ }
65
+
66
+ test_types.each do |doc_type|
67
+ result = client.search_tenders(
68
+ org_region: org_region,
69
+ exact_date: exact_date,
70
+ subsystem_type: subsystem_type,
71
+ document_type: doc_type,
72
+ include_attachments: include_attachments
73
+ )
74
+
75
+ if result.success?
76
+ tenders = result.data[:tenders] || []
77
+ archives = result.data[:total_archives] || 0
78
+
79
+ subsystem_results[:tenders].concat(tenders)
80
+ subsystem_results[:archives] += archives
81
+ total_archives += archives
82
+
83
+ # Add subsystem info to each tender
84
+ tenders.each do |tender|
85
+ tender[:subsystem_type] = subsystem_type
86
+ tender[:subsystem_description] = DocumentTypes.description_for_subsystem(subsystem_type)
87
+ tender[:document_type_used] = doc_type
88
+ end
89
+
90
+ total_tenders.concat(tenders)
91
+ else
92
+ subsystem_results[:errors] << "#{doc_type}: #{result.error}"
93
+ end
94
+ rescue StandardError => e
95
+ subsystem_results[:errors] << "#{doc_type}: #{e.message}"
96
+ end
97
+
98
+ all_results[subsystem_type] = subsystem_results
99
+ end
100
+
101
+ Result.success({
102
+ tenders: total_tenders,
103
+ total_archives: total_archives,
104
+ subsystem_results: all_results,
105
+ search_params: {
106
+ org_region: org_region,
107
+ exact_date: exact_date,
108
+ subsystems_searched: subsystems.size
109
+ },
110
+ processed_at: Time.now
111
+ })
112
+ end
113
+
114
+ # Get documents by registry number across subsystems
115
+ def get_docs_by_reestr_number(reestr_number:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM)
116
+ client = Client.new
117
+ client.get_docs_by_reestr_number(
118
+ reestr_number: reestr_number,
119
+ subsystem_type: subsystem_type
120
+ )
121
+ end
122
+
123
+ # Enhanced search with detailed information extraction
124
+ def enhanced_search_tenders(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
125
+ document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE,
126
+ include_attachments: true)
127
+ client = Client.new
128
+ client.enhanced_search_tenders(
129
+ org_region: org_region,
130
+ exact_date: exact_date,
131
+ subsystem_type: subsystem_type,
132
+ document_type: document_type,
133
+ include_attachments: include_attachments
134
+ )
135
+ end
33
136
 
34
- # Get documents by registry number (convenience method)
35
- def self.get_docs_by_reestr_number(reestr_number:, token: nil, **options)
36
- client = Client.new(token: token)
37
- client.get_docs_by_reestr_number(reestr_number: reestr_number, **options)
137
+ # Search tenders with automatic wait on API blocks and resume capability
138
+ def search_tenders_with_auto_wait(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
139
+ document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE, resume_state: nil,
140
+ include_attachments: true)
141
+ client = Client.new
142
+
143
+ # Если есть состояние для продолжения
144
+ if resume_state
145
+ start_from = resume_state[:next_archive_index] || 0
146
+ client.search_tenders_with_resume(
147
+ org_region: org_region,
148
+ exact_date: exact_date,
149
+ subsystem_type: subsystem_type,
150
+ document_type: document_type,
151
+ start_from_archive: start_from,
152
+ resume_state: resume_state,
153
+ include_attachments: include_attachments
154
+ )
155
+ else
156
+ # Используем обычный метод если авто-ожидание включено
157
+ if configuration.auto_wait_on_block
158
+ client.search_tenders(
159
+ org_region: org_region,
160
+ exact_date: exact_date,
161
+ subsystem_type: subsystem_type,
162
+ document_type: document_type,
163
+ include_attachments: include_attachments
164
+ )
165
+ else
166
+ # Используем метод с возможностью продолжения
167
+ client.search_tenders_with_resume(
168
+ org_region: org_region,
169
+ exact_date: exact_date,
170
+ subsystem_type: subsystem_type,
171
+ document_type: document_type,
172
+ include_attachments: include_attachments
173
+ )
174
+ end
175
+ end
176
+ end
38
177
  end
39
178
  end
@@ -13,6 +13,8 @@ module OxTenderAbstract
13
13
  include ContextualLogger
14
14
 
15
15
  MAX_FILE_SIZE_BYTES = 100 * 1024 * 1024 # 100 MB in bytes
16
+ MAX_RETRY_ATTEMPTS = 3
17
+ RETRY_DELAY_SECONDS = 2
16
18
 
17
19
  def initialize
18
20
  # Archive processor initialization
@@ -23,8 +25,8 @@ module OxTenderAbstract
23
25
  return Result.failure('Empty archive URL') if archive_url.nil? || archive_url.empty?
24
26
 
25
27
  begin
26
- # Download archive to memory
27
- download_result = download_to_memory(archive_url)
28
+ # Download archive to memory with retry logic
29
+ download_result = download_with_retry(archive_url)
28
30
  return download_result if download_result.failure?
29
31
 
30
32
  content = download_result.data[:content]
@@ -56,15 +58,52 @@ module OxTenderAbstract
56
58
  file_count: zip_result.size
57
59
  })
58
60
  else
61
+ # Log first bytes for debugging
62
+ log_error "Unknown archive format. First 10 bytes: #{content[0..9].unpack1('H*')}"
59
63
  Result.failure('Unknown archive format (not GZIP and not ZIP)')
60
64
  end
61
65
  rescue StandardError => e
66
+ log_error "Archive processing error: #{e.message}"
67
+ log_error e.backtrace.first(3).join("\n") if e.backtrace
62
68
  Result.failure("Archive processing error: #{e.message}")
63
69
  end
64
70
  end
65
71
 
66
72
  private
67
73
 
74
+ def download_with_retry(archive_url)
75
+ attempt = 1
76
+ last_error = nil
77
+
78
+ while attempt <= MAX_RETRY_ATTEMPTS
79
+ begin
80
+ log_info "Download attempt #{attempt}/#{MAX_RETRY_ATTEMPTS} for archive"
81
+ result = download_to_memory(archive_url)
82
+
83
+ if result.success?
84
+ log_info "Download successful on attempt #{attempt}"
85
+ return result
86
+ else
87
+ last_error = result.error
88
+ log_warn "Download attempt #{attempt} failed: #{last_error}"
89
+ end
90
+ rescue StandardError => e
91
+ last_error = e.message
92
+ log_error "Download attempt #{attempt} exception: #{last_error}"
93
+ end
94
+
95
+ if attempt < MAX_RETRY_ATTEMPTS
96
+ sleep_time = RETRY_DELAY_SECONDS * attempt
97
+ log_info "Waiting #{sleep_time} seconds before retry..."
98
+ sleep(sleep_time)
99
+ end
100
+
101
+ attempt += 1
102
+ end
103
+
104
+ Result.failure("Download failed after #{MAX_RETRY_ATTEMPTS} attempts. Last error: #{last_error}")
105
+ end
106
+
68
107
  def download_to_memory(url)
69
108
  begin
70
109
  uri = URI.parse(url)
@@ -81,12 +120,42 @@ module OxTenderAbstract
81
120
  request['User-Agent'] = "OxTenderAbstract/#{OxTenderAbstract::VERSION}"
82
121
  request['individualPerson_token'] = OxTenderAbstract.configuration.token
83
122
 
84
- log_debug "Downloading archive from: #{url}"
123
+ log_debug "Downloading archive from: #{url[0..100]}..."
85
124
 
86
125
  response = http.request(request)
87
126
 
127
+ # Enhanced error handling with response details
88
128
  unless response.is_a?(Net::HTTPSuccess)
89
- return Result.failure("HTTP error: #{response.code} #{response.message}")
129
+ error_msg = "HTTP error: #{response.code} #{response.message}"
130
+ if response.body && !response.body.empty?
131
+ # Log first part of response body for debugging
132
+ body_preview = response.body[0..500]
133
+ log_error "Response body preview: #{body_preview}"
134
+ error_msg += ". Response: #{body_preview[0..100]}"
135
+ end
136
+ return Result.failure(error_msg)
137
+ end
138
+
139
+ # Check for download blocking message in successful response
140
+ if response.body&.include?('Скачивание архива по данной ссылке заблокировано')
141
+ if OxTenderAbstract.configuration.auto_wait_on_block
142
+ wait_time = OxTenderAbstract.configuration.block_wait_time
143
+ log_error "Archive download blocked. Auto-waiting for #{wait_time} seconds..."
144
+
145
+ # Показываем прогресс ожидания
146
+ show_wait_progress(wait_time)
147
+
148
+ log_info 'Wait completed, retrying download...'
149
+ # Рекурсивно повторяем попытку после ожидания
150
+ return download_to_memory(url)
151
+ else
152
+ log_error 'Archive download blocked for 10 minutes'
153
+ return Result.failure(
154
+ 'Archive download blocked for 10 minutes',
155
+ error_type: :blocked,
156
+ retry_after: 600
157
+ )
158
+ end
90
159
  end
91
160
 
92
161
  content = response.body
@@ -96,7 +165,9 @@ module OxTenderAbstract
96
165
  return Result.failure("Archive too large: #{size} bytes (max: #{MAX_FILE_SIZE_BYTES})")
97
166
  end
98
167
 
99
- log_debug "Downloaded archive: #{size} bytes"
168
+ return Result.failure('Empty archive downloaded') if size == 0
169
+
170
+ log_debug "Downloaded archive: #{size} bytes, content-type: #{response['content-type']}"
100
171
 
101
172
  Result.success({
102
173
  content: content,
@@ -106,6 +177,7 @@ module OxTenderAbstract
106
177
  rescue SocketError, Timeout::Error => e
107
178
  Result.failure("Network error: #{e.message}")
108
179
  rescue StandardError => e
180
+ log_error "Download error details: #{e.class} - #{e.message}"
109
181
  Result.failure("Download error: #{e.message}")
110
182
  end
111
183
  end
@@ -116,6 +188,10 @@ module OxTenderAbstract
116
188
  http.verify_mode = OxTenderAbstract.configuration.ssl_verify ? OpenSSL::SSL::VERIFY_PEER : OpenSSL::SSL::VERIFY_NONE
117
189
  http.open_timeout = OxTenderAbstract.configuration.timeout_open
118
190
  http.read_timeout = OxTenderAbstract.configuration.timeout_read
191
+
192
+ # Add debug logging for HTTP client configuration
193
+ log_debug "HTTP client config: SSL=#{http.use_ssl?}, verify=#{http.verify_mode}, open_timeout=#{http.open_timeout}, read_timeout=#{http.read_timeout}"
194
+
119
195
  http
120
196
  end
121
197
 
@@ -126,19 +202,23 @@ module OxTenderAbstract
126
202
  decompressed_content = gz.read
127
203
  gz.close
128
204
 
205
+ log_debug "GZIP decompression: #{gzip_content.bytesize} -> #{decompressed_content.bytesize} bytes"
206
+
129
207
  Result.success({
130
208
  content: decompressed_content,
131
209
  compressed_size: gzip_content.bytesize,
132
210
  decompressed_size: decompressed_content.bytesize
133
211
  })
134
212
  rescue Zlib::GzipFile::Error => e
213
+ log_error "GZIP decompression error: #{e.message}"
135
214
  Result.failure("GZIP decompression error: #{e.message}")
136
215
  rescue StandardError => e
216
+ log_error "Decompression error: #{e.message}"
137
217
  Result.failure("Decompression error: #{e.message}")
138
218
  end
139
219
 
140
220
  def extract_zip_from_memory(zip_content)
141
- log_debug 'Extracting ZIP archive from memory'
221
+ log_debug "Extracting ZIP archive from memory (#{zip_content.bytesize} bytes)"
142
222
 
143
223
  files = {}
144
224
  zip_io = StringIO.new(zip_content)
@@ -149,23 +229,63 @@ module OxTenderAbstract
149
229
 
150
230
  log_debug "Extracting file: #{entry.name} (#{entry.size} bytes)"
151
231
 
152
- content = entry.get_input_stream.read
153
-
154
- files[entry.name] = {
155
- content: content,
156
- size: entry.size,
157
- compressed_size: entry.compressed_size,
158
- crc: entry.crc
159
- }
232
+ begin
233
+ content = entry.get_input_stream.read
234
+
235
+ files[entry.name] = {
236
+ content: content,
237
+ size: entry.size,
238
+ compressed_size: entry.compressed_size,
239
+ crc: entry.crc
240
+ }
241
+ rescue StandardError => e
242
+ log_error "Error extracting file #{entry.name}: #{e.message}"
243
+ # Continue with other files instead of failing completely
244
+ end
160
245
  end
161
246
  end
162
247
 
163
248
  log_debug "Extracted #{files.size} files from ZIP archive"
164
249
  files
165
250
  rescue Zip::Error => e
251
+ log_error "ZIP extraction error: #{e.message}"
166
252
  raise ArchiveError, "ZIP extraction error: #{e.message}"
167
253
  rescue StandardError => e
254
+ log_error "Archive extraction error: #{e.message}"
255
+ log_error e.backtrace.first(3).join("\n") if e.backtrace
168
256
  raise ArchiveError, "Archive extraction error: #{e.message}"
169
257
  end
258
+
259
+ # Show wait progress during API block
260
+ def show_wait_progress(total_seconds)
261
+ return if total_seconds <= 0
262
+
263
+ log_info "Waiting #{total_seconds} seconds for API block to expire..."
264
+
265
+ # Показываем прогресс каждые 30 секунд для больших интервалов
266
+ if total_seconds > 60
267
+ intervals = [30, 60, 120, 180, 300].select { |i| i < total_seconds }
268
+
269
+ intervals.each do |interval|
270
+ sleep(interval)
271
+ remaining = total_seconds - interval
272
+ total_seconds = remaining
273
+
274
+ if remaining > 60
275
+ log_info "Still waiting... #{remaining} seconds remaining (#{(remaining / 60.0).round(1)} minutes)"
276
+ else
277
+ log_info "Still waiting... #{remaining} seconds remaining"
278
+ end
279
+ end
280
+
281
+ # Ждем оставшееся время
282
+ sleep(total_seconds) if total_seconds > 0
283
+ else
284
+ # Для коротких интервалов просто ждем
285
+ sleep(total_seconds)
286
+ end
287
+
288
+ log_info 'Wait period completed!'
289
+ end
170
290
  end
171
291
  end
@@ -76,8 +76,8 @@ module OxTenderAbstract
76
76
 
77
77
  # Search tenders with full workflow: API -> Archive -> Parse
78
78
  def search_tenders(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
79
- document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE)
80
- log_info "Starting tender search for region #{org_region}, date #{exact_date}"
79
+ document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE, include_attachments: true)
80
+ log_info "Starting tender search for region #{org_region}, date #{exact_date}, subsystem: #{subsystem_type}, type: #{document_type}"
81
81
 
82
82
  # Step 1: Get archive URLs from API
83
83
  api_result = get_docs_by_region(
@@ -94,44 +94,78 @@ module OxTenderAbstract
94
94
 
95
95
  log_info "Found #{archive_urls.size} archives to process"
96
96
 
97
- # Step 2: Process each archive
97
+ # Step 2: Process each archive with error resilience
98
98
  all_tenders = []
99
99
  total_files = 0
100
+ processed_archives = 0
101
+ failed_archives = 0
100
102
 
101
103
  archive_urls.each_with_index do |archive_url, index|
102
104
  log_info "Processing archive #{index + 1}/#{archive_urls.size}"
103
105
 
104
- archive_result = download_archive_data(archive_url)
105
- next if archive_result.failure?
106
+ begin
107
+ archive_result = download_archive_data(archive_url)
106
108
 
107
- files = archive_result.data[:files]
108
- total_files += files.size
109
+ if archive_result.failure?
110
+ log_error "Failed to download archive #{index + 1}: #{archive_result.error}"
111
+ failed_archives += 1
112
+ next
113
+ end
109
114
 
110
- # Step 3: Parse XML files from archive
111
- xml_files = files.select { |name, _| name.downcase.end_with?('.xml') }
115
+ processed_archives += 1
116
+ files = archive_result.data[:files]
117
+ total_files += files.size
112
118
 
113
- xml_files.each do |file_name, file_data|
114
- parse_result = parse_xml_document(file_data[:content])
115
- next if parse_result.failure?
116
- next unless parse_result.data[:document_type] == :tender
119
+ # Step 3: Parse XML files from archive
120
+ xml_files = files.select { |name, _| name.downcase.end_with?('.xml') }
121
+ log_debug "Found #{xml_files.size} XML files in archive #{index + 1}"
117
122
 
118
- tender_data = parse_result.data[:content]
119
- next if tender_data[:reestr_number].nil? || tender_data[:reestr_number].empty?
123
+ xml_files.each do |file_name, file_data|
124
+ parse_result = parse_xml_document(file_data[:content])
120
125
 
121
- # Add metadata
122
- tender_data[:source_file] = file_name
123
- tender_data[:archive_url] = archive_url
124
- tender_data[:processed_at] = Time.now
126
+ if parse_result.failure?
127
+ log_debug "Failed to parse #{file_name}: #{parse_result.error}"
128
+ next
129
+ end
125
130
 
126
- all_tenders << tender_data
131
+ next unless parse_result.data[:document_type] == :tender
132
+
133
+ tender_data = parse_result.data[:content]
134
+ next if tender_data[:reestr_number].nil? || tender_data[:reestr_number].empty?
135
+
136
+ # Extract attachments if requested
137
+ if include_attachments
138
+ attachments_result = extract_attachments_from_xml(file_data[:content])
139
+ if attachments_result.success?
140
+ tender_data[:attachments] = attachments_result.data[:attachments]
141
+ tender_data[:attachments_count] = attachments_result.data[:total_count]
142
+ end
143
+ end
144
+
145
+ # Add metadata
146
+ tender_data[:source_file] = file_name
147
+ tender_data[:archive_url] = archive_url
148
+ tender_data[:processed_at] = Time.now
149
+
150
+ all_tenders << tender_data
151
+ rescue StandardError => e
152
+ log_error "Error processing file #{file_name}: #{e.message}"
153
+ # Continue with other files
154
+ end
155
+ rescue StandardError => e
156
+ log_error "Critical error processing archive #{index + 1}: #{e.message}"
157
+ failed_archives += 1
158
+ # Continue with other archives
127
159
  end
128
160
  end
129
161
 
130
- log_info "Search completed. Found #{all_tenders.size} tenders in #{total_files} files"
162
+ log_info "Search completed. Processed: #{processed_archives}/#{archive_urls.size} archives, Failed: #{failed_archives}, Found #{all_tenders.size} tenders in #{total_files} files"
131
163
 
132
164
  Result.success({
133
165
  tenders: all_tenders,
134
166
  total_archives: archive_urls.size,
167
+ processed_archives: processed_archives,
168
+ failed_archives: failed_archives,
135
169
  total_files: total_files,
136
170
  processed_at: Time.now
137
171
  })
@@ -211,6 +245,140 @@ module OxTenderAbstract
211
245
  })
212
246
  end
213
247
 
248
+ # Search tenders with automatic resume capability
249
+ # Позволяет продолжить загрузку с места паузы при блокировках API
250
+ def search_tenders_with_resume(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
251
+ document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE,
252
+ start_from_archive: 0, resume_state: nil, include_attachments: true)
253
+ log_info "Starting tender search with resume capability for region #{org_region}, date #{exact_date}"
254
+ log_info "Starting from archive #{start_from_archive}" if start_from_archive > 0
255
+
256
+ # Восстанавливаем состояние если есть
257
+ if resume_state
258
+ log_info "Resuming from previous state: #{resume_state[:processed_archives]} archives processed"
259
+ all_tenders = resume_state[:tenders] || []
260
+ total_files = resume_state[:total_files] || 0
261
+ processed_archives = resume_state[:processed_archives] || 0
262
+ failed_archives = resume_state[:failed_archives] || 0
263
+ archive_urls = resume_state[:archive_urls]
264
+ else
265
+ # Step 1: Get archive URLs from API
266
+ api_result = get_docs_by_region(
267
+ org_region: org_region,
268
+ subsystem_type: subsystem_type,
269
+ document_type: document_type,
270
+ exact_date: exact_date
271
+ )
272
+
273
+ return api_result if api_result.failure?
274
+
275
+ archive_urls = api_result.data[:archive_urls]
276
+ return Result.success({ tenders: [], total_archives: 0, total_files: 0 }) if archive_urls.empty?
277
+
278
+ all_tenders = []
279
+ total_files = 0
280
+ processed_archives = 0
281
+ failed_archives = 0
282
+ end
283
+
284
+ log_info "Found #{archive_urls.size} archives to process (starting from #{start_from_archive})"
285
+
286
+ # Step 2: Process archives starting from specified position
287
+ (start_from_archive...archive_urls.size).each do |index|
288
+ archive_url = archive_urls[index]
289
+ log_info "Processing archive #{index + 1}/#{archive_urls.size}"
290
+
291
+ begin
292
+ archive_result = download_archive_data(archive_url)
293
+
294
+ if archive_result.failure?
295
+ # Проверяем, была ли блокировка с автоматическим ожиданием
296
+ if archive_result.metadata[:error_type] == :blocked &&
297
+ !OxTenderAbstract.configuration.auto_wait_on_block
298
+ # Возвращаем состояние для возможности продолжения
299
+ resume_state = {
300
+ tenders: all_tenders,
301
+ total_files: total_files,
302
+ processed_archives: processed_archives,
303
+ failed_archives: failed_archives,
304
+ archive_urls: archive_urls,
305
+ next_archive_index: index
306
+ }
307
+
308
+ return Result.failure(
309
+ "Archive download blocked, can resume from archive #{index + 1}",
310
+ error_type: :blocked,
311
+ retry_after: 600,
312
+ resume_state: resume_state
313
+ )
314
+ else
315
+ log_error "Failed to download archive #{index + 1}: #{archive_result.error}"
316
+ failed_archives += 1
317
+ next
318
+ end
319
+ end
320
+
321
+ processed_archives += 1
322
+ files = archive_result.data[:files]
323
+ total_files += files.size
324
+
325
+ # Step 3: Parse XML files from archive
326
+ xml_files = files.select { |name, _| name.downcase.end_with?('.xml') }
327
+ log_debug "Found #{xml_files.size} XML files in archive #{index + 1}"
328
+
329
+ xml_files.each do |file_name, file_data|
330
+ parse_result = parse_xml_document(file_data[:content])
331
+
332
+ if parse_result.failure?
333
+ log_debug "Failed to parse #{file_name}: #{parse_result.error}"
334
+ next
335
+ end
336
+
337
+ next unless parse_result.data[:document_type] == :tender
338
+
339
+ tender_data = parse_result.data[:content]
340
+ next if tender_data[:reestr_number].nil? || tender_data[:reestr_number].empty?
341
+
342
+ # Extract attachments if requested
343
+ if include_attachments
344
+ attachments_result = extract_attachments_from_xml(file_data[:content])
345
+ if attachments_result.success?
346
+ tender_data[:attachments] = attachments_result.data[:attachments]
347
+ tender_data[:attachments_count] = attachments_result.data[:total_count]
348
+ end
349
+ end
350
+
351
+ # Add metadata
352
+ tender_data[:source_file] = file_name
353
+ tender_data[:archive_url] = archive_url
354
+ tender_data[:processed_at] = Time.now
355
+ tender_data[:archive_index] = index
356
+
357
+ all_tenders << tender_data
358
+ rescue StandardError => e
359
+ log_error "Error processing file #{file_name}: #{e.message}"
360
+ # Continue with other files
361
+ end
362
+ rescue StandardError => e
363
+ log_error "Critical error processing archive #{index + 1}: #{e.message}"
364
+ failed_archives += 1
365
+ # Continue with other archives
366
+ end
367
+ end
368
+
369
+ log_info "Search completed. Processed: #{processed_archives}/#{archive_urls.size} archives, Failed: #{failed_archives}, Found #{all_tenders.size} tenders in #{total_files} files"
370
+
371
+ Result.success({
372
+ tenders: all_tenders,
373
+ total_archives: archive_urls.size,
374
+ processed_archives: processed_archives,
375
+ failed_archives: failed_archives,
376
+ total_files: total_files,
377
+ processed_at: Time.now,
378
+ completed: true
379
+ })
380
+ end
381
+
214
382
  private
215
383
 
216
384
  def validate_token!
@@ -5,7 +5,8 @@ require 'logger'
5
5
  module OxTenderAbstract
6
6
  # Configuration for the library
7
7
  class Configuration
8
- attr_accessor :token, :timeout_open, :timeout_read, :ssl_verify
8
+ attr_accessor :token, :timeout_open, :timeout_read, :ssl_verify,
9
+ :auto_wait_on_block, :block_wait_time, :max_wait_time
9
10
  attr_writer :wsdl_url, :logger
10
11
 
11
12
  def initialize
@@ -15,6 +16,9 @@ module OxTenderAbstract
15
16
  @ssl_verify = false
16
17
  @wsdl_url = nil # Will be set later
17
18
  @logger = nil # Will be set later
19
+ @auto_wait_on_block = true # Автоматически ждать при блокировке
20
+ @block_wait_time = 610 # Время ожидания при блокировке (10 мин + 10 сек)
21
+ @max_wait_time = 900 # Максимальное время ожидания (15 мин)
18
22
  end
19
23
 
20
24
  def wsdl_url
@@ -16,8 +16,8 @@ module OxTenderAbstract
16
16
  CONTRACT_EXECUTION_REPORT TENDER_NOTICE TENDER_DOCUMENTATION
17
17
  ].freeze
18
18
 
19
- # Electronic notification types
20
- ELECTRONIC_NOTIFICATION_TYPES = %w[
19
+ # Electronic notification types for 44-FZ
20
+ ELECTRONIC_NOTIFICATION_TYPES_44FZ = %w[
21
21
  epNotificationEF2020 epNotificationEF epNotificationOK2020
22
22
  epNotificationEP2020 epNotificationZK2020 epNotificationZP2020
23
23
  epNotificationISM2020 fcsNotificationEF fcsNotificationOK
@@ -25,10 +25,80 @@ module OxTenderAbstract
25
25
  fcsNotificationISM fcsPlacement fcsPlacementResult
26
26
  ].freeze
27
27
 
28
+ # Electronic notification types for 223-FZ
29
+ ELECTRONIC_NOTIFICATION_TYPES_223FZ = %w[
30
+ epNotification223 notification223 purchaseNotice223
31
+ purchaseNoticeEA223 purchaseNoticeZK223 purchaseNoticeZP223
32
+ purchaseNoticeOK223 purchaseNoticeIS223 contractNotice223
33
+ contractExecutionNotice223 purchasePlan223
34
+ ].freeze
35
+
36
+ # Electronic notification types for regional and municipal
37
+ ELECTRONIC_NOTIFICATION_TYPES_REGIONAL = %w[
38
+ epNotificationRP epNotificationRPGZ notificationRP
39
+ notificationRPGZ purchaseNoticeRP purchaseNoticeRPGZ
40
+ contractNoticeRP contractNoticeRPGZ
41
+ ].freeze
42
+
43
+ # All supported electronic notification types
44
+ ELECTRONIC_NOTIFICATION_TYPES = (
45
+ ELECTRONIC_NOTIFICATION_TYPES_44FZ +
46
+ ELECTRONIC_NOTIFICATION_TYPES_223FZ +
47
+ ELECTRONIC_NOTIFICATION_TYPES_REGIONAL
48
+ ).freeze
49
+
28
50
  # Default settings
29
51
  DEFAULT_SUBSYSTEM = 'PRIZ'
30
52
  DEFAULT_DOCUMENT_TYPE = 'epNotificationEF2020'
31
53
 
54
+ # Subsystem descriptions
55
+ SUBSYSTEM_DESCRIPTIONS = {
56
+ 'PRIZ' => '44-ФЗ - Основные закупки федеральных органов',
57
+ 'OD223' => '223-ФЗ - Закупки отдельных видов юридических лиц',
58
+ 'RD223' => '223-ФЗ - Реестр договоров',
59
+ 'RPEC' => 'Закупки субъектов РФ',
60
+ 'RPGZ' => 'Муниципальные закупки',
61
+ 'RGK' => 'Закупки государственных корпораций',
62
+ 'BTK' => 'Закупки бюджетных, автономных учреждений',
63
+ 'UR' => 'Закупки субъектов естественных монополий',
64
+ 'RJ' => 'Закупки для нужд судебной системы',
65
+ 'RDI' => 'Закупки для нужд дошкольных образовательных учреждений',
66
+ 'RPKLKP' => 'Закупки для нужд подведомственных Калининградской области',
67
+ 'RPNZ' => 'Закупки для нужд образовательных учреждений НЗО',
68
+ 'EA' => 'Электронные аукционы',
69
+ 'REC' => 'Реестр недобросовестных поставщиков',
70
+ 'RPP' => 'Реестр поставщиков',
71
+ 'RVP' => 'Реестр внутренних поставщиков',
72
+ 'RRK' => 'Реестр результатов контроля',
73
+ 'RRA' => 'Реестр результатов аудита',
74
+ 'RNP' => 'Реестр нарушений при проведении закупок',
75
+ 'RKPO' => 'Реестр контрольно-проверочных организаций'
76
+ }.freeze
77
+
78
+ # Get appropriate document types for subsystem
79
+ def self.document_types_for_subsystem(subsystem_type)
80
+ case subsystem_type
81
+ when 'PRIZ', 'RPEC', 'RPGZ', 'RGK', 'BTK', 'UR', 'RJ', 'RDI'
82
+ ELECTRONIC_NOTIFICATION_TYPES_44FZ
83
+ when 'OD223', 'RD223'
84
+ ELECTRONIC_NOTIFICATION_TYPES_223FZ + ELECTRONIC_NOTIFICATION_TYPES_44FZ
85
+ when /RP/
86
+ ELECTRONIC_NOTIFICATION_TYPES_REGIONAL + ELECTRONIC_NOTIFICATION_TYPES_44FZ
87
+ else
88
+ ELECTRONIC_NOTIFICATION_TYPES_44FZ
89
+ end
90
+ end
91
+
92
+ # Check if subsystem supports document type
93
+ def self.subsystem_supports_document_type?(subsystem_type, document_type)
94
+ document_types_for_subsystem(subsystem_type).include?(document_type)
95
+ end
96
+
97
+ # Get description for subsystem
98
+ def self.description_for_subsystem(subsystem_type)
99
+ SUBSYSTEM_DESCRIPTIONS[subsystem_type] || "Подсистема #{subsystem_type}"
100
+ end
101
+
32
102
  # API configuration
33
103
  API_CONFIG = {
34
104
  wsdl: 'https://int44.zakupki.gov.ru/eis-integration/services/getDocsIP?wsdl',
@@ -7,18 +7,30 @@ module OxTenderAbstract
7
7
  # Configuration related errors
8
8
  class ConfigurationError < Error; end
9
9
 
10
- # Network related errors
11
- class NetworkError < Error; end
10
+ # API related errors
11
+ class ApiError < Error; end
12
12
 
13
- # SOAP API related errors
14
- class SoapError < Error; end
13
+ # Archive processing errors
14
+ class ArchiveError < Error; end
15
15
 
16
- # XML parsing related errors
16
+ # XML parsing errors
17
17
  class ParseError < Error; end
18
18
 
19
- # Archive processing related errors
20
- class ArchiveError < Error; end
19
+ # Network related errors
20
+ class NetworkError < Error; end
21
+
22
+ # Archive download blocked error (10 minute block)
23
+ class ArchiveBlockedError < ArchiveError
24
+ attr_reader :blocked_until, :retry_after_seconds
25
+
26
+ def initialize(message = 'Archive download blocked', retry_after_seconds = 600)
27
+ super(message)
28
+ @retry_after_seconds = retry_after_seconds
29
+ @blocked_until = Time.now + retry_after_seconds
30
+ end
21
31
 
22
- # Authentication related errors
23
- class AuthenticationError < Error; end
32
+ def can_retry_at
33
+ @blocked_until
34
+ end
35
+ end
24
36
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OxTenderAbstract
4
- VERSION = '0.9.2'
4
+ VERSION = '0.9.4'
5
5
  end
@@ -486,36 +486,57 @@ module OxTenderAbstract
486
486
  total_sum = nil
487
487
 
488
488
  begin
489
- # Find purchase objects nodes - use more defensive approach
489
+ # Find purchase objects nodes - including drug and non-drug objects
490
+ # Regular purchase objects
490
491
  purchase_object_nodes = doc.xpath(
491
492
  '//ns5:purchaseObjectsInfo//ns4:purchaseObject | //purchaseObjectsInfo//purchaseObject', namespaces
492
493
  )
493
494
 
495
+ # Drug purchase objects (лекарственные препараты)
496
+ drug_object_nodes = doc.xpath(
497
+ '//ns5:drugPurchaseObjectsInfo//ns4:drugPurchaseObjectInfo | //drugPurchaseObjectsInfo//drugPurchaseObjectInfo', namespaces
498
+ )
499
+
500
+ # Process regular purchase objects
494
501
  purchase_objects = purchase_object_nodes.map do |object_node|
495
502
  extract_purchase_object_data(object_node, namespaces)
496
503
  end.compact
497
504
 
498
- # Extract total sum from purchaseObjectsInfo
505
+ # Process drug purchase objects
506
+ drug_objects = drug_object_nodes.map do |drug_node|
507
+ extract_drug_purchase_object_data(drug_node, namespaces)
508
+ end.compact
509
+
510
+ # Combine all objects
511
+ all_objects = purchase_objects + drug_objects
512
+
513
+ # Extract total sum from various sources
499
514
  total_sum = extract_price_from_text(find_text_with_namespaces(doc, [
500
515
  '//ns5:purchaseObjectsInfo//ns4:totalSum',
501
516
  '//purchaseObjectsInfo//totalSum',
502
517
  '//ns5:notDrugPurchaseObjectsInfo/ns4:totalSum',
503
- '//notDrugPurchaseObjectsInfo/totalSum'
518
+ '//notDrugPurchaseObjectsInfo/totalSum',
519
+ '//ns5:drugPurchaseObjectsInfo/ns4:total',
520
+ '//drugPurchaseObjectsInfo/total'
504
521
  ], namespaces))
505
522
 
506
523
  # Extract quantity undefined flag
507
524
  quantity_undefined = find_text_with_namespaces(doc, [
508
525
  '//ns5:purchaseObjectsInfo//ns5:quantityUndefined',
509
- '//purchaseObjectsInfo//quantityUndefined'
526
+ '//purchaseObjectsInfo//quantityUndefined',
527
+ '//ns5:drugPurchaseObjectsInfo//ns5:quantityUndefined',
528
+ '//drugPurchaseObjectsInfo//quantityUndefined'
510
529
  ], namespaces) == 'true'
511
530
 
512
- return {} if purchase_objects.empty? && total_sum.nil?
531
+ return {} if all_objects.empty? && total_sum.nil?
513
532
 
514
533
  {
515
- objects: purchase_objects,
516
- objects_count: purchase_objects.size,
534
+ objects: all_objects,
535
+ objects_count: all_objects.size,
517
536
  total_sum: total_sum,
518
- quantity_undefined: quantity_undefined
537
+ quantity_undefined: quantity_undefined,
538
+ drug_objects_count: drug_objects.size,
539
+ regular_objects_count: purchase_objects.size
519
540
  }.compact
520
541
  rescue StandardError => e
521
542
  log_debug "Error extracting purchase objects: #{e.message}"
@@ -632,6 +653,106 @@ module OxTenderAbstract
632
653
  object_data.compact
633
654
  end
634
655
 
656
+ def extract_drug_purchase_object_data(drug_node, namespaces)
657
+ # Extract data from drug purchase object info
658
+ drug_data = {
659
+ sid: extract_text_from_node(drug_node, './/ns4:sid | .//sid'),
660
+ external_sid: extract_text_from_node(drug_node, './/ns4:externalSid | .//externalSid'),
661
+ name: extract_text_from_node(drug_node, './/ns4:name | .//name'),
662
+ price: extract_price_from_text(extract_text_from_node(drug_node, './/ns4:price | .//price')),
663
+ quantity: extract_text_from_node(drug_node, './/ns4:quantity/ns4:value | .//quantity/value')&.to_i,
664
+ sum: extract_price_from_text(extract_text_from_node(drug_node, './/ns4:sum | .//sum')),
665
+ type: 'drug', # Mark as drug object
666
+ hierarchy_type: extract_text_from_node(drug_node, './/ns4:hierarchyType | .//hierarchyType')
667
+ }
668
+
669
+ # Extract INN (International Nonproprietary Name) for drugs
670
+ inn_node = drug_node.at_xpath('.//ns4:INN | .//INN', namespaces)
671
+ if inn_node
672
+ drug_data[:inn] = {
673
+ code: extract_text_from_node(inn_node, './/ns2:code | .//code'),
674
+ name: extract_text_from_node(inn_node, './/ns2:name | .//name')
675
+ }
676
+ end
677
+
678
+ # Extract dosage form information
679
+ dosage_form_node = drug_node.at_xpath('.//ns4:dosageForm | .//dosageForm', namespaces)
680
+ if dosage_form_node
681
+ drug_data[:dosage_form] = {
682
+ code: extract_text_from_node(dosage_form_node, './/ns2:code | .//code'),
683
+ name: extract_text_from_node(dosage_form_node, './/ns2:name | .//name')
684
+ }
685
+ end
686
+
687
+ # OKPD2 information for drugs
688
+ okpd2_node = drug_node.at_xpath('.//ns4:OKPD2 | .//OKPD2', namespaces)
689
+ if okpd2_node
690
+ drug_data[:okpd2] = {
691
+ code: extract_text_from_node(okpd2_node, './/ns2:OKPDCode | .//OKPDCode'),
692
+ name: extract_text_from_node(okpd2_node, './/ns2:OKPDName | .//OKPDName')
693
+ }
694
+ end
695
+
696
+ # OKEI information (units of measurement)
697
+ okei_node = drug_node.at_xpath('.//ns4:OKEI | .//OKEI', namespaces)
698
+ if okei_node
699
+ drug_data[:okei] = {
700
+ code: extract_text_from_node(okei_node, './/ns2:code | .//code'),
701
+ national_code: extract_text_from_node(okei_node, './/ns2:nationalCode | .//nationalCode'),
702
+ name: extract_text_from_node(okei_node, './/ns2:name | .//name')
703
+ }
704
+ end
705
+
706
+ # Extract characteristics for drugs
707
+ characteristics_nodes = drug_node.xpath(
708
+ './/ns4:characteristics//ns4:characteristicsUsingReferenceInfo | .//characteristics//characteristicsUsingReferenceInfo', namespaces
709
+ )
710
+ characteristics_nodes += drug_node.xpath(
711
+ './/ns4:characteristics//ns4:characteristicsUsingTextForm | .//characteristics//characteristicsUsingTextForm', namespaces
712
+ )
713
+
714
+ if characteristics_nodes.any?
715
+ characteristics_details = characteristics_nodes.map do |char_node|
716
+ char_data = {
717
+ name: extract_text_from_node(char_node, './/ns4:name | .//name'),
718
+ type: extract_text_from_node(char_node, './/ns4:type | .//type')
719
+ }
720
+
721
+ # Extract values from text form characteristics
722
+ values_nodes = char_node.xpath('.//ns4:values/ns4:value | .//values/value', namespaces)
723
+ if values_nodes.any?
724
+ char_data[:values] = values_nodes.map do |value_node|
725
+ extract_text_from_node(value_node, './/ns4:qualityDescription | .//qualityDescription') ||
726
+ extract_text_from_node(value_node, './/ns4:textValue | .//textValue')
727
+ end.compact
728
+ end
729
+
730
+ char_data
731
+ end
732
+
733
+ drug_data[:characteristics] = {
734
+ count: characteristics_nodes.size,
735
+ details: characteristics_details
736
+ }
737
+ end
738
+
739
+ # Determine the product name
740
+ product_name = if drug_data[:name] && !drug_data[:name].empty?
741
+ drug_data[:name]
742
+ elsif drug_data[:inn] && drug_data[:inn][:name] && !drug_data[:inn][:name].empty?
743
+ drug_data[:inn][:name]
744
+ elsif drug_data[:okpd2] && drug_data[:okpd2][:name] && !drug_data[:okpd2][:name].empty?
745
+ drug_data[:okpd2][:name]
746
+ else
747
+ 'Unknown drug'
748
+ end
749
+
750
+ drug_data[:product_name] = product_name
751
+ drug_data[:name_type] = 'drug_name'
752
+
753
+ drug_data.compact
754
+ end
755
+
635
756
  private
636
757
 
637
758
  def determine_name_type(name)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ox-tender-abstract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.2
4
+ version: 0.9.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - smolev