ox-tender-abstract 0.9.1 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec_status +18 -18
- data/CHANGELOG.md +11 -0
- data/README.md +35 -0
- data/lib/ox-tender-abstract.rb +154 -21
- data/lib/oxtenderabstract/archive_processor.rb +192 -76
- data/lib/oxtenderabstract/client.rb +170 -20
- data/lib/oxtenderabstract/configuration.rb +5 -1
- data/lib/oxtenderabstract/document_types.rb +72 -2
- data/lib/oxtenderabstract/errors.rb +21 -9
- data/lib/oxtenderabstract/version.rb +1 -1
- data/lib/oxtenderabstract/xml_parser.rb +164 -23
- metadata +1 -1
@@ -1,38 +1,40 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
3
|
+
require 'net/http'
|
4
|
+
require 'uri'
|
5
|
+
require 'openssl'
|
6
|
+
require 'zlib'
|
7
|
+
require 'stringio'
|
8
|
+
require 'zip'
|
9
9
|
|
10
10
|
module OxTenderAbstract
|
11
11
|
# Archive processor for downloading and extracting archive files
|
12
12
|
class ArchiveProcessor
|
13
13
|
include ContextualLogger
|
14
14
|
|
15
|
-
MAX_FILE_SIZE_BYTES = 100 * 1024 * 1024
|
16
|
-
|
15
|
+
MAX_FILE_SIZE_BYTES = 100 * 1024 * 1024 # 100 MB in bytes
|
16
|
+
MAX_RETRY_ATTEMPTS = 3
|
17
|
+
RETRY_DELAY_SECONDS = 2
|
18
|
+
|
17
19
|
def initialize
|
18
20
|
# Archive processor initialization
|
19
21
|
end
|
20
22
|
|
21
23
|
# Download and extract archive data
|
22
24
|
def download_and_extract(archive_url)
|
23
|
-
return Result.failure(
|
25
|
+
return Result.failure('Empty archive URL') if archive_url.nil? || archive_url.empty?
|
24
26
|
|
25
27
|
begin
|
26
|
-
# Download archive to memory
|
27
|
-
download_result =
|
28
|
+
# Download archive to memory with retry logic
|
29
|
+
download_result = download_with_retry(archive_url)
|
28
30
|
return download_result if download_result.failure?
|
29
31
|
|
30
32
|
content = download_result.data[:content]
|
31
33
|
|
32
34
|
# Determine archive format by first bytes
|
33
|
-
first_bytes = content[0..1].
|
35
|
+
first_bytes = content[0..1].unpack1('H*')
|
34
36
|
|
35
|
-
if first_bytes ==
|
37
|
+
if first_bytes == '1f8b'
|
36
38
|
# This is GZIP archive - decompress GZIP, then ZIP
|
37
39
|
gunzip_result = decompress_gzip(content)
|
38
40
|
return gunzip_result if gunzip_result.failure?
|
@@ -40,38 +42,73 @@ module OxTenderAbstract
|
|
40
42
|
zip_result = extract_zip_from_memory(gunzip_result.data[:content])
|
41
43
|
|
42
44
|
Result.success({
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
elsif content[0..1] ==
|
45
|
+
files: zip_result,
|
46
|
+
total_size: download_result.data[:size],
|
47
|
+
compressed_size: gunzip_result.data[:compressed_size],
|
48
|
+
file_count: zip_result.size
|
49
|
+
})
|
50
|
+
elsif content[0..1] == 'PK'
|
49
51
|
# This is already ZIP archive - parse directly
|
50
52
|
zip_result = extract_zip_from_memory(content)
|
51
53
|
|
52
54
|
Result.success({
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
55
|
+
files: zip_result,
|
56
|
+
total_size: download_result.data[:size],
|
57
|
+
compressed_size: nil,
|
58
|
+
file_count: zip_result.size
|
59
|
+
})
|
58
60
|
else
|
59
|
-
|
61
|
+
# Log first bytes for debugging
|
62
|
+
log_error "Unknown archive format. First 10 bytes: #{content[0..9].unpack1('H*')}"
|
63
|
+
Result.failure('Unknown archive format (not GZIP and not ZIP)')
|
60
64
|
end
|
61
|
-
rescue => e
|
65
|
+
rescue StandardError => e
|
66
|
+
log_error "Archive processing error: #{e.message}"
|
67
|
+
log_error e.backtrace.first(3).join("\n") if e.backtrace
|
62
68
|
Result.failure("Archive processing error: #{e.message}")
|
63
69
|
end
|
64
70
|
end
|
65
71
|
|
66
72
|
private
|
67
73
|
|
74
|
+
def download_with_retry(archive_url)
|
75
|
+
attempt = 1
|
76
|
+
last_error = nil
|
77
|
+
|
78
|
+
while attempt <= MAX_RETRY_ATTEMPTS
|
79
|
+
begin
|
80
|
+
log_info "Download attempt #{attempt}/#{MAX_RETRY_ATTEMPTS} for archive"
|
81
|
+
result = download_to_memory(archive_url)
|
82
|
+
|
83
|
+
if result.success?
|
84
|
+
log_info "Download successful on attempt #{attempt}"
|
85
|
+
return result
|
86
|
+
else
|
87
|
+
last_error = result.error
|
88
|
+
log_warn "Download attempt #{attempt} failed: #{last_error}"
|
89
|
+
end
|
90
|
+
rescue StandardError => e
|
91
|
+
last_error = e.message
|
92
|
+
log_error "Download attempt #{attempt} exception: #{last_error}"
|
93
|
+
end
|
94
|
+
|
95
|
+
if attempt < MAX_RETRY_ATTEMPTS
|
96
|
+
sleep_time = RETRY_DELAY_SECONDS * attempt
|
97
|
+
log_info "Waiting #{sleep_time} seconds before retry..."
|
98
|
+
sleep(sleep_time)
|
99
|
+
end
|
100
|
+
|
101
|
+
attempt += 1
|
102
|
+
end
|
103
|
+
|
104
|
+
Result.failure("Download failed after #{MAX_RETRY_ATTEMPTS} attempts. Last error: #{last_error}")
|
105
|
+
end
|
106
|
+
|
68
107
|
def download_to_memory(url)
|
69
108
|
begin
|
70
109
|
uri = URI.parse(url)
|
71
110
|
# Check if URI is valid HTTP/HTTPS
|
72
|
-
unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
73
|
-
return Result.failure("Invalid URL: not HTTP/HTTPS")
|
74
|
-
end
|
111
|
+
return Result.failure('Invalid URL: not HTTP/HTTPS') unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
75
112
|
rescue URI::InvalidURIError => e
|
76
113
|
return Result.failure("Invalid URL: #{e.message}")
|
77
114
|
end
|
@@ -80,15 +117,45 @@ module OxTenderAbstract
|
|
80
117
|
http = create_http_client(uri)
|
81
118
|
|
82
119
|
request = Net::HTTP::Get.new(uri.request_uri)
|
83
|
-
request[
|
84
|
-
request[
|
120
|
+
request['User-Agent'] = "OxTenderAbstract/#{OxTenderAbstract::VERSION}"
|
121
|
+
request['individualPerson_token'] = OxTenderAbstract.configuration.token
|
85
122
|
|
86
|
-
log_debug "Downloading archive from: #{url}"
|
123
|
+
log_debug "Downloading archive from: #{url[0..100]}..."
|
87
124
|
|
88
125
|
response = http.request(request)
|
89
126
|
|
127
|
+
# Enhanced error handling with response details
|
90
128
|
unless response.is_a?(Net::HTTPSuccess)
|
91
|
-
|
129
|
+
error_msg = "HTTP error: #{response.code} #{response.message}"
|
130
|
+
if response.body && !response.body.empty?
|
131
|
+
# Log first part of response body for debugging
|
132
|
+
body_preview = response.body[0..500]
|
133
|
+
log_error "Response body preview: #{body_preview}"
|
134
|
+
error_msg += ". Response: #{body_preview[0..100]}"
|
135
|
+
end
|
136
|
+
return Result.failure(error_msg)
|
137
|
+
end
|
138
|
+
|
139
|
+
# Check for download blocking message in successful response
|
140
|
+
if response.body&.include?('Скачивание архива по данной ссылке заблокировано')
|
141
|
+
if OxTenderAbstract.configuration.auto_wait_on_block
|
142
|
+
wait_time = OxTenderAbstract.configuration.block_wait_time
|
143
|
+
log_error "Archive download blocked. Auto-waiting for #{wait_time} seconds..."
|
144
|
+
|
145
|
+
# Показываем прогресс ожидания
|
146
|
+
show_wait_progress(wait_time)
|
147
|
+
|
148
|
+
log_info 'Wait completed, retrying download...'
|
149
|
+
# Рекурсивно повторяем попытку после ожидания
|
150
|
+
return download_to_memory(url)
|
151
|
+
else
|
152
|
+
log_error 'Archive download blocked for 10 minutes'
|
153
|
+
return Result.failure(
|
154
|
+
'Archive download blocked for 10 minutes',
|
155
|
+
error_type: :blocked,
|
156
|
+
retry_after: 600
|
157
|
+
)
|
158
|
+
end
|
92
159
|
end
|
93
160
|
|
94
161
|
content = response.body
|
@@ -98,78 +165,127 @@ module OxTenderAbstract
|
|
98
165
|
return Result.failure("Archive too large: #{size} bytes (max: #{MAX_FILE_SIZE_BYTES})")
|
99
166
|
end
|
100
167
|
|
101
|
-
|
168
|
+
return Result.failure('Empty archive downloaded') if size == 0
|
169
|
+
|
170
|
+
log_debug "Downloaded archive: #{size} bytes, content-type: #{response['content-type']}"
|
102
171
|
|
103
172
|
Result.success({
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
173
|
+
content: content,
|
174
|
+
size: size,
|
175
|
+
content_type: response['content-type']
|
176
|
+
})
|
108
177
|
rescue SocketError, Timeout::Error => e
|
109
178
|
Result.failure("Network error: #{e.message}")
|
110
|
-
rescue => e
|
179
|
+
rescue StandardError => e
|
180
|
+
log_error "Download error details: #{e.class} - #{e.message}"
|
111
181
|
Result.failure("Download error: #{e.message}")
|
112
182
|
end
|
113
183
|
end
|
114
184
|
|
115
185
|
def create_http_client(uri)
|
116
186
|
http = Net::HTTP.new(uri.host, uri.port)
|
117
|
-
http.use_ssl = uri.scheme ==
|
187
|
+
http.use_ssl = uri.scheme == 'https'
|
118
188
|
http.verify_mode = OxTenderAbstract.configuration.ssl_verify ? OpenSSL::SSL::VERIFY_PEER : OpenSSL::SSL::VERIFY_NONE
|
119
189
|
http.open_timeout = OxTenderAbstract.configuration.timeout_open
|
120
190
|
http.read_timeout = OxTenderAbstract.configuration.timeout_read
|
191
|
+
|
192
|
+
# Add debug logging for HTTP client configuration
|
193
|
+
log_debug "HTTP client config: SSL=#{http.use_ssl?}, verify=#{http.verify_mode}, open_timeout=#{http.open_timeout}, read_timeout=#{http.read_timeout}"
|
194
|
+
|
121
195
|
http
|
122
196
|
end
|
123
197
|
|
124
198
|
def decompress_gzip(gzip_content)
|
125
|
-
|
126
|
-
log_debug "Decompressing GZIP archive"
|
127
|
-
|
128
|
-
gz = Zlib::GzipReader.new(StringIO.new(gzip_content))
|
129
|
-
decompressed_content = gz.read
|
130
|
-
gz.close
|
199
|
+
log_debug 'Decompressing GZIP archive'
|
131
200
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
201
|
+
gz = Zlib::GzipReader.new(StringIO.new(gzip_content))
|
202
|
+
decompressed_content = gz.read
|
203
|
+
gz.close
|
204
|
+
|
205
|
+
log_debug "GZIP decompression: #{gzip_content.bytesize} -> #{decompressed_content.bytesize} bytes"
|
206
|
+
|
207
|
+
Result.success({
|
208
|
+
content: decompressed_content,
|
209
|
+
compressed_size: gzip_content.bytesize,
|
210
|
+
decompressed_size: decompressed_content.bytesize
|
211
|
+
})
|
212
|
+
rescue Zlib::GzipFile::Error => e
|
213
|
+
log_error "GZIP decompression error: #{e.message}"
|
214
|
+
Result.failure("GZIP decompression error: #{e.message}")
|
215
|
+
rescue StandardError => e
|
216
|
+
log_error "Decompression error: #{e.message}"
|
217
|
+
Result.failure("Decompression error: #{e.message}")
|
142
218
|
end
|
143
219
|
|
144
220
|
def extract_zip_from_memory(zip_content)
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
221
|
+
log_debug "Extracting ZIP archive from memory (#{zip_content.bytesize} bytes)"
|
222
|
+
|
223
|
+
files = {}
|
224
|
+
zip_io = StringIO.new(zip_content)
|
225
|
+
|
226
|
+
Zip::File.open_buffer(zip_io) do |zip_file|
|
227
|
+
zip_file.each do |entry|
|
228
|
+
next if entry.directory?
|
229
|
+
|
230
|
+
log_debug "Extracting file: #{entry.name} (#{entry.size} bytes)"
|
231
|
+
|
232
|
+
begin
|
233
|
+
content = entry.get_input_stream.read
|
234
|
+
|
157
235
|
files[entry.name] = {
|
158
|
-
content:
|
236
|
+
content: content,
|
159
237
|
size: entry.size,
|
160
238
|
compressed_size: entry.compressed_size,
|
161
239
|
crc: entry.crc
|
162
240
|
}
|
241
|
+
rescue StandardError => e
|
242
|
+
log_error "Error extracting file #{entry.name}: #{e.message}"
|
243
|
+
# Continue with other files instead of failing completely
|
163
244
|
end
|
164
245
|
end
|
246
|
+
end
|
247
|
+
|
248
|
+
log_debug "Extracted #{files.size} files from ZIP archive"
|
249
|
+
files
|
250
|
+
rescue Zip::Error => e
|
251
|
+
log_error "ZIP extraction error: #{e.message}"
|
252
|
+
raise ArchiveError, "ZIP extraction error: #{e.message}"
|
253
|
+
rescue StandardError => e
|
254
|
+
log_error "Archive extraction error: #{e.message}"
|
255
|
+
log_error e.backtrace.first(3).join("\n") if e.backtrace
|
256
|
+
raise ArchiveError, "Archive extraction error: #{e.message}"
|
257
|
+
end
|
258
|
+
|
259
|
+
# Show wait progress during API block
|
260
|
+
def show_wait_progress(total_seconds)
|
261
|
+
return if total_seconds <= 0
|
262
|
+
|
263
|
+
log_info "Waiting #{total_seconds} seconds for API block to expire..."
|
264
|
+
|
265
|
+
# Показываем прогресс каждые 30 секунд для больших интервалов
|
266
|
+
if total_seconds > 60
|
267
|
+
intervals = [30, 60, 120, 180, 300].select { |i| i < total_seconds }
|
165
268
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
269
|
+
intervals.each do |interval|
|
270
|
+
sleep(interval)
|
271
|
+
remaining = total_seconds - interval
|
272
|
+
total_seconds = remaining
|
273
|
+
|
274
|
+
if remaining > 60
|
275
|
+
log_info "Still waiting... #{remaining} seconds remaining (#{(remaining / 60.0).round(1)} minutes)"
|
276
|
+
else
|
277
|
+
log_info "Still waiting... #{remaining} seconds remaining"
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
# Ждем оставшееся время
|
282
|
+
sleep(total_seconds) if total_seconds > 0
|
283
|
+
else
|
284
|
+
# Для коротких интервалов просто ждем
|
285
|
+
sleep(total_seconds)
|
172
286
|
end
|
287
|
+
|
288
|
+
log_info 'Wait period completed!'
|
173
289
|
end
|
174
290
|
end
|
175
|
-
end
|
291
|
+
end
|
@@ -77,7 +77,7 @@ module OxTenderAbstract
|
|
77
77
|
# Search tenders with full workflow: API -> Archive -> Parse
|
78
78
|
def search_tenders(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
|
79
79
|
document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE)
|
80
|
-
log_info "Starting tender search for region #{org_region}, date #{exact_date}"
|
80
|
+
log_info "Starting tender search for region #{org_region}, date #{exact_date}, subsystem: #{subsystem_type}, type: #{document_type}"
|
81
81
|
|
82
82
|
# Step 1: Get archive URLs from API
|
83
83
|
api_result = get_docs_by_region(
|
@@ -94,44 +94,69 @@ module OxTenderAbstract
|
|
94
94
|
|
95
95
|
log_info "Found #{archive_urls.size} archives to process"
|
96
96
|
|
97
|
-
# Step 2: Process each archive
|
97
|
+
# Step 2: Process each archive with error resilience
|
98
98
|
all_tenders = []
|
99
99
|
total_files = 0
|
100
|
+
processed_archives = 0
|
101
|
+
failed_archives = 0
|
100
102
|
|
101
103
|
archive_urls.each_with_index do |archive_url, index|
|
102
104
|
log_info "Processing archive #{index + 1}/#{archive_urls.size}"
|
103
105
|
|
104
|
-
|
105
|
-
|
106
|
+
begin
|
107
|
+
archive_result = download_archive_data(archive_url)
|
106
108
|
|
107
|
-
|
108
|
-
|
109
|
+
if archive_result.failure?
|
110
|
+
log_error "Failed to download archive #{index + 1}: #{archive_result.error}"
|
111
|
+
failed_archives += 1
|
112
|
+
next
|
113
|
+
end
|
109
114
|
|
110
|
-
|
111
|
-
|
115
|
+
processed_archives += 1
|
116
|
+
files = archive_result.data[:files]
|
117
|
+
total_files += files.size
|
112
118
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
next unless parse_result.data[:document_type] == :tender
|
119
|
+
# Step 3: Parse XML files from archive
|
120
|
+
xml_files = files.select { |name, _| name.downcase.end_with?('.xml') }
|
121
|
+
log_debug "Found #{xml_files.size} XML files in archive #{index + 1}"
|
117
122
|
|
118
|
-
|
119
|
-
|
123
|
+
xml_files.each do |file_name, file_data|
|
124
|
+
parse_result = parse_xml_document(file_data[:content])
|
120
125
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
126
|
+
if parse_result.failure?
|
127
|
+
log_debug "Failed to parse #{file_name}: #{parse_result.error}"
|
128
|
+
next
|
129
|
+
end
|
125
130
|
|
126
|
-
|
131
|
+
next unless parse_result.data[:document_type] == :tender
|
132
|
+
|
133
|
+
tender_data = parse_result.data[:content]
|
134
|
+
next if tender_data[:reestr_number].nil? || tender_data[:reestr_number].empty?
|
135
|
+
|
136
|
+
# Add metadata
|
137
|
+
tender_data[:source_file] = file_name
|
138
|
+
tender_data[:archive_url] = archive_url
|
139
|
+
tender_data[:processed_at] = Time.now
|
140
|
+
|
141
|
+
all_tenders << tender_data
|
142
|
+
rescue StandardError => e
|
143
|
+
log_error "Error processing file #{file_name}: #{e.message}"
|
144
|
+
# Continue with other files
|
145
|
+
end
|
146
|
+
rescue StandardError => e
|
147
|
+
log_error "Critical error processing archive #{index + 1}: #{e.message}"
|
148
|
+
failed_archives += 1
|
149
|
+
# Continue with other archives
|
127
150
|
end
|
128
151
|
end
|
129
152
|
|
130
|
-
log_info "Search completed. Found #{all_tenders.size} tenders in #{total_files} files"
|
153
|
+
log_info "Search completed. Processed: #{processed_archives}/#{archive_urls.size} archives, Failed: #{failed_archives}, Found #{all_tenders.size} tenders in #{total_files} files"
|
131
154
|
|
132
155
|
Result.success({
|
133
156
|
tenders: all_tenders,
|
134
157
|
total_archives: archive_urls.size,
|
158
|
+
processed_archives: processed_archives,
|
159
|
+
failed_archives: failed_archives,
|
135
160
|
total_files: total_files,
|
136
161
|
processed_at: Time.now
|
137
162
|
})
|
@@ -211,6 +236,131 @@ module OxTenderAbstract
|
|
211
236
|
})
|
212
237
|
end
|
213
238
|
|
239
|
+
# Search tenders with automatic resume capability
|
240
|
+
# Позволяет продолжить загрузку с места паузы при блокировках API
|
241
|
+
def search_tenders_with_resume(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
|
242
|
+
document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE,
|
243
|
+
start_from_archive: 0, resume_state: nil)
|
244
|
+
log_info "Starting tender search with resume capability for region #{org_region}, date #{exact_date}"
|
245
|
+
log_info "Starting from archive #{start_from_archive}" if start_from_archive > 0
|
246
|
+
|
247
|
+
# Восстанавливаем состояние если есть
|
248
|
+
if resume_state
|
249
|
+
log_info "Resuming from previous state: #{resume_state[:processed_archives]} archives processed"
|
250
|
+
all_tenders = resume_state[:tenders] || []
|
251
|
+
total_files = resume_state[:total_files] || 0
|
252
|
+
processed_archives = resume_state[:processed_archives] || 0
|
253
|
+
failed_archives = resume_state[:failed_archives] || 0
|
254
|
+
archive_urls = resume_state[:archive_urls]
|
255
|
+
else
|
256
|
+
# Step 1: Get archive URLs from API
|
257
|
+
api_result = get_docs_by_region(
|
258
|
+
org_region: org_region,
|
259
|
+
subsystem_type: subsystem_type,
|
260
|
+
document_type: document_type,
|
261
|
+
exact_date: exact_date
|
262
|
+
)
|
263
|
+
|
264
|
+
return api_result if api_result.failure?
|
265
|
+
|
266
|
+
archive_urls = api_result.data[:archive_urls]
|
267
|
+
return Result.success({ tenders: [], total_archives: 0, total_files: 0 }) if archive_urls.empty?
|
268
|
+
|
269
|
+
all_tenders = []
|
270
|
+
total_files = 0
|
271
|
+
processed_archives = 0
|
272
|
+
failed_archives = 0
|
273
|
+
end
|
274
|
+
|
275
|
+
log_info "Found #{archive_urls.size} archives to process (starting from #{start_from_archive})"
|
276
|
+
|
277
|
+
# Step 2: Process archives starting from specified position
|
278
|
+
(start_from_archive...archive_urls.size).each do |index|
|
279
|
+
archive_url = archive_urls[index]
|
280
|
+
log_info "Processing archive #{index + 1}/#{archive_urls.size}"
|
281
|
+
|
282
|
+
begin
|
283
|
+
archive_result = download_archive_data(archive_url)
|
284
|
+
|
285
|
+
if archive_result.failure?
|
286
|
+
# Проверяем, была ли блокировка с автоматическим ожиданием
|
287
|
+
if archive_result.metadata[:error_type] == :blocked &&
|
288
|
+
!OxTenderAbstract.configuration.auto_wait_on_block
|
289
|
+
# Возвращаем состояние для возможности продолжения
|
290
|
+
resume_state = {
|
291
|
+
tenders: all_tenders,
|
292
|
+
total_files: total_files,
|
293
|
+
processed_archives: processed_archives,
|
294
|
+
failed_archives: failed_archives,
|
295
|
+
archive_urls: archive_urls,
|
296
|
+
next_archive_index: index
|
297
|
+
}
|
298
|
+
|
299
|
+
return Result.failure(
|
300
|
+
"Archive download blocked, can resume from archive #{index + 1}",
|
301
|
+
error_type: :blocked,
|
302
|
+
retry_after: 600,
|
303
|
+
resume_state: resume_state
|
304
|
+
)
|
305
|
+
else
|
306
|
+
log_error "Failed to download archive #{index + 1}: #{archive_result.error}"
|
307
|
+
failed_archives += 1
|
308
|
+
next
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
processed_archives += 1
|
313
|
+
files = archive_result.data[:files]
|
314
|
+
total_files += files.size
|
315
|
+
|
316
|
+
# Step 3: Parse XML files from archive
|
317
|
+
xml_files = files.select { |name, _| name.downcase.end_with?('.xml') }
|
318
|
+
log_debug "Found #{xml_files.size} XML files in archive #{index + 1}"
|
319
|
+
|
320
|
+
xml_files.each do |file_name, file_data|
|
321
|
+
parse_result = parse_xml_document(file_data[:content])
|
322
|
+
|
323
|
+
if parse_result.failure?
|
324
|
+
log_debug "Failed to parse #{file_name}: #{parse_result.error}"
|
325
|
+
next
|
326
|
+
end
|
327
|
+
|
328
|
+
next unless parse_result.data[:document_type] == :tender
|
329
|
+
|
330
|
+
tender_data = parse_result.data[:content]
|
331
|
+
next if tender_data[:reestr_number].nil? || tender_data[:reestr_number].empty?
|
332
|
+
|
333
|
+
# Add metadata
|
334
|
+
tender_data[:source_file] = file_name
|
335
|
+
tender_data[:archive_url] = archive_url
|
336
|
+
tender_data[:processed_at] = Time.now
|
337
|
+
tender_data[:archive_index] = index
|
338
|
+
|
339
|
+
all_tenders << tender_data
|
340
|
+
rescue StandardError => e
|
341
|
+
log_error "Error processing file #{file_name}: #{e.message}"
|
342
|
+
# Continue with other files
|
343
|
+
end
|
344
|
+
rescue StandardError => e
|
345
|
+
log_error "Critical error processing archive #{index + 1}: #{e.message}"
|
346
|
+
failed_archives += 1
|
347
|
+
# Continue with other archives
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
log_info "Search completed. Processed: #{processed_archives}/#{archive_urls.size} archives, Failed: #{failed_archives}, Found #{all_tenders.size} tenders in #{total_files} files"
|
352
|
+
|
353
|
+
Result.success({
|
354
|
+
tenders: all_tenders,
|
355
|
+
total_archives: archive_urls.size,
|
356
|
+
processed_archives: processed_archives,
|
357
|
+
failed_archives: failed_archives,
|
358
|
+
total_files: total_files,
|
359
|
+
processed_at: Time.now,
|
360
|
+
completed: true
|
361
|
+
})
|
362
|
+
end
|
363
|
+
|
214
364
|
private
|
215
365
|
|
216
366
|
def validate_token!
|
@@ -5,7 +5,8 @@ require 'logger'
|
|
5
5
|
module OxTenderAbstract
|
6
6
|
# Configuration for the library
|
7
7
|
class Configuration
|
8
|
-
attr_accessor :token, :timeout_open, :timeout_read, :ssl_verify
|
8
|
+
attr_accessor :token, :timeout_open, :timeout_read, :ssl_verify,
|
9
|
+
:auto_wait_on_block, :block_wait_time, :max_wait_time
|
9
10
|
attr_writer :wsdl_url, :logger
|
10
11
|
|
11
12
|
def initialize
|
@@ -15,6 +16,9 @@ module OxTenderAbstract
|
|
15
16
|
@ssl_verify = false
|
16
17
|
@wsdl_url = nil # Will be set later
|
17
18
|
@logger = nil # Will be set later
|
19
|
+
@auto_wait_on_block = true # Автоматически ждать при блокировке
|
20
|
+
@block_wait_time = 610 # Время ожидания при блокировке (10 мин + 10 сек)
|
21
|
+
@max_wait_time = 900 # Максимальное время ожидания (15 мин)
|
18
22
|
end
|
19
23
|
|
20
24
|
def wsdl_url
|