ox-tender-abstract 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.cursor/rules/010-project-structure.mdc +11 -0
- data/.cursor/rules/998-clean-code.mdc +62 -0
- data/.cursor/rules/999-mdc-format.mdc +132 -0
- data/.cursor/rules/api-integration.mdc +63 -0
- data/.cursor/rules/project-structure.mdc +113 -0
- data/.cursor/rules/ruby-conventions.mdc +121 -0
- data/.cursor/rules/testing-patterns.mdc +169 -0
- data/.rspec +3 -0
- data/.rspec_status +73 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +3 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/LICENSE +21 -0
- data/README.md +297 -0
- data/Rakefile +12 -0
- data/lib/ox-tender-abstract.rb +39 -0
- data/lib/oxtenderabstract/archive_processor.rb +175 -0
- data/lib/oxtenderabstract/client.rb +347 -0
- data/lib/oxtenderabstract/compatibility.rb +11 -0
- data/lib/oxtenderabstract/configuration.rb +60 -0
- data/lib/oxtenderabstract/document_types.rb +42 -0
- data/lib/oxtenderabstract/engine.rb +11 -0
- data/lib/oxtenderabstract/errors.rb +24 -0
- data/lib/oxtenderabstract/logger.rb +42 -0
- data/lib/oxtenderabstract/result.rb +31 -0
- data/lib/oxtenderabstract/version.rb +5 -0
- data/lib/oxtenderabstract/xml_parser.rb +529 -0
- data/lib/ruby/ox-tender-abstract.rb +2 -0
- metadata +229 -0
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
module OxTenderAbstract
|
6
|
+
# Simple logging module for the library
|
7
|
+
module ContextualLogger
|
8
|
+
def self.included(base)
|
9
|
+
base.extend(ClassMethods)
|
10
|
+
end
|
11
|
+
|
12
|
+
module ClassMethods
|
13
|
+
def logger
|
14
|
+
@logger ||= OxTenderAbstract.configuration.logger
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def logger
|
19
|
+
self.class.logger
|
20
|
+
end
|
21
|
+
|
22
|
+
def log_debug(message)
|
23
|
+
logger.debug("[#{self.class.name}] #{message}")
|
24
|
+
end
|
25
|
+
|
26
|
+
def log_info(message)
|
27
|
+
logger.info("[#{self.class.name}] #{message}")
|
28
|
+
end
|
29
|
+
|
30
|
+
def log_warn(message)
|
31
|
+
logger.warn("[#{self.class.name}] #{message}")
|
32
|
+
end
|
33
|
+
|
34
|
+
def log_error(message)
|
35
|
+
logger.error("[#{self.class.name}] #{message}")
|
36
|
+
end
|
37
|
+
|
38
|
+
def log_fatal(message)
|
39
|
+
logger.fatal("[#{self.class.name}] #{message}")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OxTenderAbstract
|
4
|
+
# Result structure for API operations
|
5
|
+
class Result
|
6
|
+
attr_reader :success, :data, :error, :metadata
|
7
|
+
|
8
|
+
def initialize(success:, data: nil, error: nil, metadata: {})
|
9
|
+
@success = success
|
10
|
+
@data = data
|
11
|
+
@error = error
|
12
|
+
@metadata = metadata
|
13
|
+
end
|
14
|
+
|
15
|
+
def success?
|
16
|
+
@success
|
17
|
+
end
|
18
|
+
|
19
|
+
def failure?
|
20
|
+
!@success
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.success(data, metadata = {})
|
24
|
+
new(success: true, data:, metadata:)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.failure(error, metadata = {})
|
28
|
+
new(success: false, error:, metadata:)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,529 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module OxTenderAbstract
|
6
|
+
# XML parser for tender documents
|
7
|
+
class XmlParser
|
8
|
+
include ContextualLogger
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
# XML parser initialization
|
12
|
+
end
|
13
|
+
|
14
|
+
# Parse XML document and return structured data
|
15
|
+
def parse(xml_content)
|
16
|
+
return Result.failure('Empty XML content') if xml_content.nil? || xml_content.empty?
|
17
|
+
|
18
|
+
begin
|
19
|
+
doc = Nokogiri::XML(xml_content)
|
20
|
+
|
21
|
+
# Check XML validity
|
22
|
+
return Result.failure('Invalid XML') if doc.errors.any?
|
23
|
+
|
24
|
+
# Detect document type
|
25
|
+
document_type = detect_document_type(doc)
|
26
|
+
|
27
|
+
# Extract data based on type
|
28
|
+
parsed_data = case document_type
|
29
|
+
when :tender
|
30
|
+
parse_tender_document(doc)
|
31
|
+
when :contract
|
32
|
+
parse_contract_document(doc)
|
33
|
+
when :organization
|
34
|
+
parse_organization_document(doc)
|
35
|
+
else
|
36
|
+
parse_generic_document(doc)
|
37
|
+
end
|
38
|
+
|
39
|
+
Result.success({
|
40
|
+
document_type: document_type,
|
41
|
+
root_element: doc.root.name,
|
42
|
+
namespace: doc.root.namespace&.href,
|
43
|
+
content: parsed_data
|
44
|
+
})
|
45
|
+
rescue StandardError => e
|
46
|
+
Result.failure("XML parsing error: #{e.message}")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Extract attachments information from XML
|
51
|
+
def extract_attachments(xml_content)
|
52
|
+
return Result.failure('Empty XML content') if xml_content.nil? || xml_content.empty?
|
53
|
+
|
54
|
+
begin
|
55
|
+
doc = Nokogiri::XML(xml_content)
|
56
|
+
namespaces = extract_namespaces(doc)
|
57
|
+
|
58
|
+
# Find various attachment node patterns
|
59
|
+
attachment_nodes = []
|
60
|
+
|
61
|
+
# Common attachment paths
|
62
|
+
attachment_paths = [
|
63
|
+
'//ns4:attachmentInfo',
|
64
|
+
'//attachmentInfo',
|
65
|
+
'//ns5:attachmentsInfo//ns4:attachmentInfo',
|
66
|
+
'//attachmentsInfo//attachmentInfo'
|
67
|
+
]
|
68
|
+
|
69
|
+
attachment_paths.each do |path|
|
70
|
+
nodes = doc.xpath(path, namespaces)
|
71
|
+
attachment_nodes.concat(nodes) if nodes.any?
|
72
|
+
end
|
73
|
+
|
74
|
+
attachments = attachment_nodes.map { |node| extract_attachment_info(node) }.compact
|
75
|
+
|
76
|
+
Result.success({
|
77
|
+
attachments: attachments,
|
78
|
+
total_count: attachments.size
|
79
|
+
})
|
80
|
+
rescue StandardError => e
|
81
|
+
Result.failure("Attachment extraction error: #{e.message}")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def detect_document_type(doc)
|
88
|
+
root_name = doc.root.name.downcase
|
89
|
+
|
90
|
+
case root_name
|
91
|
+
when /notification/, /tender/, /auction/
|
92
|
+
:tender
|
93
|
+
when /contract/
|
94
|
+
:contract
|
95
|
+
when /organization/, /org/
|
96
|
+
:organization
|
97
|
+
else
|
98
|
+
# Additional detection based on content (without namespaces for simple detection)
|
99
|
+
if doc.xpath('//purchaseNumber').any? || doc.xpath("//*[local-name()='purchaseNumber']").any?
|
100
|
+
:tender
|
101
|
+
elsif doc.xpath('//contractNumber').any? || doc.xpath("//*[local-name()='contractNumber']").any?
|
102
|
+
:contract
|
103
|
+
else
|
104
|
+
:unknown
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def parse_tender_document(doc)
|
110
|
+
namespaces = extract_namespaces(doc)
|
111
|
+
|
112
|
+
log_debug "Parsing tender document with namespaces: #{namespaces.keys}"
|
113
|
+
|
114
|
+
# Basic tender information
|
115
|
+
tender_data = {
|
116
|
+
reestr_number: find_text_with_namespaces(doc, [
|
117
|
+
'//ns5:purchaseNumber',
|
118
|
+
'//purchaseNumber',
|
119
|
+
'//ns5:commonInfo/ns5:purchaseNumber',
|
120
|
+
'//commonInfo/purchaseNumber'
|
121
|
+
], namespaces),
|
122
|
+
|
123
|
+
doc_number: find_text_with_namespaces(doc, [
|
124
|
+
'//ns5:docNumber',
|
125
|
+
'//docNumber',
|
126
|
+
'//ns5:commonInfo/ns5:docNumber'
|
127
|
+
], namespaces),
|
128
|
+
|
129
|
+
title: find_text_with_namespaces(doc, [
|
130
|
+
'//ns5:purchaseObjectInfo',
|
131
|
+
'//purchaseObjectInfo',
|
132
|
+
'//ns5:commonInfo/ns5:purchaseObjectInfo'
|
133
|
+
], namespaces),
|
134
|
+
|
135
|
+
placement_type: find_text_with_namespaces(doc, [
|
136
|
+
'//ns5:placingWay/ns2:name',
|
137
|
+
'//placingWay/name',
|
138
|
+
'//ns2:name'
|
139
|
+
], namespaces),
|
140
|
+
|
141
|
+
publish_date: extract_date_from_text(find_text_with_namespaces(doc, [
|
142
|
+
'//ns5:publishDTInEIS',
|
143
|
+
'//publishDTInEIS',
|
144
|
+
'//ns5:commonInfo/ns5:publishDTInEIS'
|
145
|
+
], namespaces)),
|
146
|
+
|
147
|
+
planned_publish_date: extract_date_from_text(find_text_with_namespaces(doc, [
|
148
|
+
'//ns5:plannedPublishDate',
|
149
|
+
'//plannedPublishDate'
|
150
|
+
], namespaces)),
|
151
|
+
|
152
|
+
# Contract information
|
153
|
+
max_price: extract_price_from_text(find_text_with_namespaces(doc, [
|
154
|
+
'//ns5:maxPrice',
|
155
|
+
'//maxPrice',
|
156
|
+
'//ns5:contractConditionsInfo/ns5:maxPriceInfo/ns5:maxPrice',
|
157
|
+
'//ns5:notificationInfo/ns5:contractConditionsInfo/ns5:maxPriceInfo/ns5:maxPrice'
|
158
|
+
], namespaces)),
|
159
|
+
|
160
|
+
currency: find_text_with_namespaces(doc, [
|
161
|
+
'//ns5:currency/ns2:name',
|
162
|
+
'//currency/name',
|
163
|
+
'//ns2:name[parent::currency]'
|
164
|
+
], namespaces),
|
165
|
+
|
166
|
+
# Dates
|
167
|
+
start_date: extract_date_from_text(find_text_with_namespaces(doc, [
|
168
|
+
'//ns5:startDT',
|
169
|
+
'//startDT',
|
170
|
+
'//ns5:collectingInfo/ns5:startDT'
|
171
|
+
], namespaces)),
|
172
|
+
|
173
|
+
end_date: extract_date_from_text(find_text_with_namespaces(doc, [
|
174
|
+
'//ns5:endDT',
|
175
|
+
'//endDT',
|
176
|
+
'//ns5:collectingInfo/ns5:endDT'
|
177
|
+
], namespaces)),
|
178
|
+
|
179
|
+
bidding_date: extract_date_from_text(find_text_with_namespaces(doc, [
|
180
|
+
'//ns5:biddingDate',
|
181
|
+
'//biddingDate'
|
182
|
+
], namespaces)),
|
183
|
+
|
184
|
+
summarizing_date: extract_date_from_text(find_text_with_namespaces(doc, [
|
185
|
+
'//ns5:summarizingDate',
|
186
|
+
'//summarizingDate'
|
187
|
+
], namespaces)),
|
188
|
+
|
189
|
+
# Organization info
|
190
|
+
organization_name: find_text_with_namespaces(doc, [
|
191
|
+
'//ns5:responsibleOrgInfo/ns5:fullName',
|
192
|
+
'//responsibleOrgInfo/fullName',
|
193
|
+
'//ns5:fullName',
|
194
|
+
'//fullName'
|
195
|
+
], namespaces),
|
196
|
+
|
197
|
+
organization_short_name: find_text_with_namespaces(doc, [
|
198
|
+
'//ns5:responsibleOrgInfo/ns5:shortName',
|
199
|
+
'//responsibleOrgInfo/shortName',
|
200
|
+
'//ns5:shortName',
|
201
|
+
'//shortName'
|
202
|
+
], namespaces),
|
203
|
+
|
204
|
+
organization_inn: find_text_with_namespaces(doc, [
|
205
|
+
'//ns5:responsibleOrgInfo/ns5:INN',
|
206
|
+
'//responsibleOrgInfo/INN',
|
207
|
+
'//ns5:INN',
|
208
|
+
'//INN'
|
209
|
+
], namespaces),
|
210
|
+
|
211
|
+
organization_kpp: find_text_with_namespaces(doc, [
|
212
|
+
'//ns5:responsibleOrgInfo/ns5:KPP',
|
213
|
+
'//responsibleOrgInfo/KPP',
|
214
|
+
'//ns5:KPP',
|
215
|
+
'//KPP'
|
216
|
+
], namespaces),
|
217
|
+
|
218
|
+
organization_reg_num: find_text_with_namespaces(doc, [
|
219
|
+
'//ns5:responsibleOrgInfo/ns5:regNum',
|
220
|
+
'//responsibleOrgInfo/regNum',
|
221
|
+
'//ns5:regNum',
|
222
|
+
'//regNum'
|
223
|
+
], namespaces),
|
224
|
+
|
225
|
+
# Contact information
|
226
|
+
contact_email: find_text_with_namespaces(doc, [
|
227
|
+
'//ns5:contactEMail',
|
228
|
+
'//contactEMail',
|
229
|
+
'//ns5:responsibleInfo/ns5:contactEMail'
|
230
|
+
], namespaces),
|
231
|
+
|
232
|
+
contact_phone: find_text_with_namespaces(doc, [
|
233
|
+
'//ns5:contactPhone',
|
234
|
+
'//contactPhone',
|
235
|
+
'//ns5:responsibleInfo/ns5:contactPhone'
|
236
|
+
], namespaces),
|
237
|
+
|
238
|
+
contact_fax: find_text_with_namespaces(doc, [
|
239
|
+
'//ns5:contactFax',
|
240
|
+
'//contactFax',
|
241
|
+
'//ns5:responsibleInfo/ns5:contactFax'
|
242
|
+
], namespaces),
|
243
|
+
|
244
|
+
# Contact person details
|
245
|
+
contact_person_name: extract_contact_person_name(doc, namespaces),
|
246
|
+
|
247
|
+
# Address information
|
248
|
+
post_address: find_text_with_namespaces(doc, [
|
249
|
+
'//ns5:responsibleOrgInfo/ns5:postAddress',
|
250
|
+
'//responsibleOrgInfo/postAddress',
|
251
|
+
'//ns5:postAddress',
|
252
|
+
'//postAddress'
|
253
|
+
], namespaces),
|
254
|
+
|
255
|
+
fact_address: find_text_with_namespaces(doc, [
|
256
|
+
'//ns5:responsibleOrgInfo/ns5:factAddress',
|
257
|
+
'//responsibleOrgInfo/factAddress',
|
258
|
+
'//ns5:factAddress',
|
259
|
+
'//factAddress'
|
260
|
+
], namespaces),
|
261
|
+
|
262
|
+
# Electronic trading platform
|
263
|
+
etp_name: find_text_with_namespaces(doc, [
|
264
|
+
'//ns5:ETP/ns2:name',
|
265
|
+
'//ETP/name'
|
266
|
+
], namespaces),
|
267
|
+
|
268
|
+
etp_url: find_text_with_namespaces(doc, [
|
269
|
+
'//ns5:ETP/ns2:url',
|
270
|
+
'//ETP/url'
|
271
|
+
], namespaces),
|
272
|
+
|
273
|
+
etp_code: find_text_with_namespaces(doc, [
|
274
|
+
'//ns5:ETP/ns2:code',
|
275
|
+
'//ETP/code'
|
276
|
+
], namespaces),
|
277
|
+
|
278
|
+
# URLs
|
279
|
+
href: find_text_with_namespaces(doc, [
|
280
|
+
'//ns5:href',
|
281
|
+
'//href'
|
282
|
+
], namespaces),
|
283
|
+
|
284
|
+
print_form_url: find_text_with_namespaces(doc, [
|
285
|
+
'//ns5:printFormInfo/ns4:url',
|
286
|
+
'//printFormInfo/url'
|
287
|
+
], namespaces),
|
288
|
+
|
289
|
+
# Additional fields
|
290
|
+
version_number: find_text_with_namespaces(doc, [
|
291
|
+
'//ns5:versionNumber',
|
292
|
+
'//versionNumber'
|
293
|
+
], namespaces),
|
294
|
+
|
295
|
+
external_id: find_text_with_namespaces(doc, [
|
296
|
+
'//ns5:externalId',
|
297
|
+
'//externalId'
|
298
|
+
], namespaces),
|
299
|
+
|
300
|
+
# Contract guarantee
|
301
|
+
contract_guarantee_amount: extract_price_from_text(find_text_with_namespaces(doc, [
|
302
|
+
'//ns5:contractGuarantee/ns5:amount',
|
303
|
+
'//contractGuarantee/amount'
|
304
|
+
], namespaces)),
|
305
|
+
|
306
|
+
contract_guarantee_part: find_text_with_namespaces(doc, [
|
307
|
+
'//ns5:contractGuarantee/ns5:part',
|
308
|
+
'//contractGuarantee/part'
|
309
|
+
], namespaces)&.to_f,
|
310
|
+
|
311
|
+
# Additional service signs
|
312
|
+
is_include_koks: find_text_with_namespaces(doc, [
|
313
|
+
'//ns5:serviceSigns/ns5:isIncludeKOKS',
|
314
|
+
'//serviceSigns/isIncludeKOKS'
|
315
|
+
], namespaces) == 'true',
|
316
|
+
|
317
|
+
# Customer information
|
318
|
+
customer_full_name: find_text_with_namespaces(doc, [
|
319
|
+
'//ns5:customer/ns2:fullName',
|
320
|
+
'//customer/fullName'
|
321
|
+
], namespaces),
|
322
|
+
|
323
|
+
customer_reg_num: find_text_with_namespaces(doc, [
|
324
|
+
'//ns5:customer/ns2:regNum',
|
325
|
+
'//customer/regNum'
|
326
|
+
], namespaces)
|
327
|
+
}
|
328
|
+
|
329
|
+
# Additional processing
|
330
|
+
tender_data[:procedure_info] = extract_procedure_info(doc, namespaces)
|
331
|
+
tender_data[:lot_info] = extract_lot_information(doc, namespaces)
|
332
|
+
tender_data[:guarantee_info] = extract_guarantee_info(doc, namespaces)
|
333
|
+
|
334
|
+
# Clean up empty values
|
335
|
+
tender_data.compact
|
336
|
+
end
|
337
|
+
|
338
|
+
def parse_contract_document(doc)
|
339
|
+
namespaces = extract_namespaces(doc)
|
340
|
+
|
341
|
+
{
|
342
|
+
contract_number: find_text_with_namespaces(doc, [
|
343
|
+
'//contractNumber',
|
344
|
+
'//ns5:contractNumber'
|
345
|
+
], namespaces),
|
346
|
+
|
347
|
+
# Add contract-specific parsing logic here
|
348
|
+
document_parsed_at: Time.now
|
349
|
+
}
|
350
|
+
end
|
351
|
+
|
352
|
+
def parse_organization_document(doc)
|
353
|
+
namespaces = extract_namespaces(doc)
|
354
|
+
|
355
|
+
{
|
356
|
+
organization_name: find_text_with_namespaces(doc, [
|
357
|
+
'//fullName',
|
358
|
+
'//ns5:fullName'
|
359
|
+
], namespaces),
|
360
|
+
|
361
|
+
# Add organization-specific parsing logic here
|
362
|
+
document_parsed_at: Time.now
|
363
|
+
}
|
364
|
+
end
|
365
|
+
|
366
|
+
def parse_generic_document(doc)
|
367
|
+
{
|
368
|
+
root_element: doc.root.name,
|
369
|
+
namespace: doc.root.namespace&.href,
|
370
|
+
element_count: doc.xpath('//*').count,
|
371
|
+
document_parsed_at: Time.now
|
372
|
+
}
|
373
|
+
end
|
374
|
+
|
375
|
+
def extract_attachment_info(node)
|
376
|
+
{
|
377
|
+
published_content_id: extract_text_from_node(node, './/ns4:publishedContentId | .//publishedContentId'),
|
378
|
+
file_name: extract_text_from_node(node, './/ns4:fileName | .//fileName'),
|
379
|
+
file_size: extract_text_from_node(node, './/ns4:fileSize | .//fileSize')&.to_i,
|
380
|
+
description: extract_text_from_node(node, './/ns4:docDescription | .//docDescription'),
|
381
|
+
url: extract_text_from_node(node, './/ns4:url | .//url'),
|
382
|
+
doc_kind: extract_text_from_node(node, './/ns4:docKindInfo/ns2:name | .//docKindInfo/name | .//ns2:name'),
|
383
|
+
doc_date: extract_date_from_text(extract_text_from_node(node, './/ns4:docDate | .//docDate'))
|
384
|
+
}.compact
|
385
|
+
end
|
386
|
+
|
387
|
+
def extract_namespaces(doc)
|
388
|
+
doc.collect_namespaces
|
389
|
+
end
|
390
|
+
|
391
|
+
def find_text_with_namespaces(doc, xpaths, namespaces)
|
392
|
+
xpaths.each do |xpath|
|
393
|
+
node = doc.at_xpath(xpath, namespaces)
|
394
|
+
text = node&.text&.strip
|
395
|
+
return text if text && !text.empty?
|
396
|
+
rescue StandardError => e
|
397
|
+
log_debug "XPath error for '#{xpath}': #{e.message}"
|
398
|
+
next
|
399
|
+
end
|
400
|
+
nil
|
401
|
+
end
|
402
|
+
|
403
|
+
def extract_text_from_node(node, xpath)
|
404
|
+
node.at_xpath(xpath)&.text&.strip
|
405
|
+
end
|
406
|
+
|
407
|
+
def extract_price_from_text(text)
|
408
|
+
return nil if text.nil? || text.empty?
|
409
|
+
|
410
|
+
# Remove any non-digit characters except decimal separator
|
411
|
+
cleaned = text.gsub(/[^\d.,]/, '')
|
412
|
+
return nil if cleaned.empty?
|
413
|
+
|
414
|
+
# Convert to string with proper decimal separator
|
415
|
+
result = cleaned.tr(',', '.')
|
416
|
+
return result if result =~ /^\d+(\.\d+)?$/
|
417
|
+
|
418
|
+
nil
|
419
|
+
rescue StandardError
|
420
|
+
nil
|
421
|
+
end
|
422
|
+
|
423
|
+
def extract_date_from_text(text)
|
424
|
+
return nil if text.nil? || text.empty?
|
425
|
+
|
426
|
+
# Try to parse various date formats
|
427
|
+
[
|
428
|
+
'%Y-%m-%dT%H:%M:%S%z', # ISO 8601 with timezone
|
429
|
+
'%Y-%m-%dT%H:%M:%S', # ISO 8601 without timezone
|
430
|
+
'%Y-%m-%d%z', # Date with timezone
|
431
|
+
'%Y-%m-%d', # Simple date
|
432
|
+
'%d.%m.%Y', # Russian format
|
433
|
+
'%d/%m/%Y' # Alternative format
|
434
|
+
].each do |format|
|
435
|
+
return Time.strptime(text, format)
|
436
|
+
rescue ArgumentError
|
437
|
+
next
|
438
|
+
end
|
439
|
+
|
440
|
+
# Try natural parsing as fallback
|
441
|
+
begin
|
442
|
+
Time.parse(text)
|
443
|
+
rescue StandardError
|
444
|
+
nil
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
def extract_procedure_info(doc, namespaces)
|
449
|
+
{
|
450
|
+
collecting_start: extract_date_from_text(find_text_with_namespaces(doc, [
|
451
|
+
'//ns5:collectingInfo/ns5:startDT',
|
452
|
+
'//collectingInfo/startDT'
|
453
|
+
], namespaces)),
|
454
|
+
|
455
|
+
collecting_end: extract_date_from_text(find_text_with_namespaces(doc, [
|
456
|
+
'//ns5:collectingInfo/ns5:endDT',
|
457
|
+
'//collectingInfo/endDT'
|
458
|
+
], namespaces))
|
459
|
+
}.compact
|
460
|
+
end
|
461
|
+
|
462
|
+
def extract_lot_information(doc, namespaces)
|
463
|
+
lot_nodes = doc.xpath('//ns5:lotInfo | //lotInfo', namespaces)
|
464
|
+
return {} if lot_nodes.empty?
|
465
|
+
|
466
|
+
lots = lot_nodes.map do |lot_node|
|
467
|
+
{
|
468
|
+
lot_number: extract_text_from_node(lot_node, './/ns5:lotNumber | .//lotNumber'),
|
469
|
+
lot_name: extract_text_from_node(lot_node, './/ns5:lotName | .//lotName'),
|
470
|
+
max_price: extract_price_from_text(extract_text_from_node(lot_node, './/ns5:maxPrice | .//maxPrice'))
|
471
|
+
}.compact
|
472
|
+
end
|
473
|
+
|
474
|
+
{ lots: lots, lots_count: lots.size }
|
475
|
+
end
|
476
|
+
|
477
|
+
def extract_guarantee_info(doc, namespaces)
|
478
|
+
{
|
479
|
+
contract_guarantee_part: find_text_with_namespaces(doc, [
|
480
|
+
'//ns5:contractGuarantee/ns5:part',
|
481
|
+
'//contractGuarantee/part'
|
482
|
+
], namespaces)&.to_f,
|
483
|
+
|
484
|
+
application_guarantee_part: find_text_with_namespaces(doc, [
|
485
|
+
'//ns5:applicationGuarantee/ns5:part',
|
486
|
+
'//applicationGuarantee/part'
|
487
|
+
], namespaces)&.to_f
|
488
|
+
}.compact
|
489
|
+
end
|
490
|
+
|
491
|
+
def extract_contact_person_name(doc, namespaces)
|
492
|
+
# Try to get full name first
|
493
|
+
full_name = find_text_with_namespaces(doc, [
|
494
|
+
'//ns5:responsibleInfo/ns5:contactPersonInfo/ns2:fullName',
|
495
|
+
'//responsibleInfo/contactPersonInfo/fullName'
|
496
|
+
], namespaces)
|
497
|
+
|
498
|
+
return full_name if full_name
|
499
|
+
|
500
|
+
# If no full name, construct from parts
|
501
|
+
last_name = find_text_with_namespaces(doc, [
|
502
|
+
'//ns5:responsibleInfo/ns5:contactPersonInfo/ns4:lastName',
|
503
|
+
'//ns5:contactPersonInfo/ns4:lastName',
|
504
|
+
'//responsibleInfo/contactPersonInfo/lastName',
|
505
|
+
'//contactPersonInfo/lastName'
|
506
|
+
], namespaces)
|
507
|
+
|
508
|
+
first_name = find_text_with_namespaces(doc, [
|
509
|
+
'//ns5:responsibleInfo/ns5:contactPersonInfo/ns4:firstName',
|
510
|
+
'//ns5:contactPersonInfo/ns4:firstName',
|
511
|
+
'//responsibleInfo/contactPersonInfo/firstName',
|
512
|
+
'//contactPersonInfo/firstName'
|
513
|
+
], namespaces)
|
514
|
+
|
515
|
+
middle_name = find_text_with_namespaces(doc, [
|
516
|
+
'//ns5:responsibleInfo/ns5:contactPersonInfo/ns4:middleName',
|
517
|
+
'//ns5:contactPersonInfo/ns4:middleName',
|
518
|
+
'//responsibleInfo/contactPersonInfo/middleName',
|
519
|
+
'//contactPersonInfo/middleName'
|
520
|
+
], namespaces)
|
521
|
+
|
522
|
+
# Construct name from parts
|
523
|
+
name_parts = [last_name, first_name, middle_name].compact
|
524
|
+
return name_parts.join(' ') unless name_parts.empty?
|
525
|
+
|
526
|
+
nil
|
527
|
+
end
|
528
|
+
end
|
529
|
+
end
|