ox-tender-abstract 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logger'
4
+
5
+ module OxTenderAbstract
6
+ # Simple logging module for the library
7
+ module ContextualLogger
8
+ def self.included(base)
9
+ base.extend(ClassMethods)
10
+ end
11
+
12
+ module ClassMethods
13
+ def logger
14
+ @logger ||= OxTenderAbstract.configuration.logger
15
+ end
16
+ end
17
+
18
+ def logger
19
+ self.class.logger
20
+ end
21
+
22
+ def log_debug(message)
23
+ logger.debug("[#{self.class.name}] #{message}")
24
+ end
25
+
26
+ def log_info(message)
27
+ logger.info("[#{self.class.name}] #{message}")
28
+ end
29
+
30
+ def log_warn(message)
31
+ logger.warn("[#{self.class.name}] #{message}")
32
+ end
33
+
34
+ def log_error(message)
35
+ logger.error("[#{self.class.name}] #{message}")
36
+ end
37
+
38
+ def log_fatal(message)
39
+ logger.fatal("[#{self.class.name}] #{message}")
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OxTenderAbstract
4
+ # Result structure for API operations
5
+ class Result
6
+ attr_reader :success, :data, :error, :metadata
7
+
8
+ def initialize(success:, data: nil, error: nil, metadata: {})
9
+ @success = success
10
+ @data = data
11
+ @error = error
12
+ @metadata = metadata
13
+ end
14
+
15
+ def success?
16
+ @success
17
+ end
18
+
19
+ def failure?
20
+ !@success
21
+ end
22
+
23
+ def self.success(data, metadata = {})
24
+ new(success: true, data:, metadata:)
25
+ end
26
+
27
+ def self.failure(error, metadata = {})
28
+ new(success: false, error:, metadata:)
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OxTenderAbstract
4
+ VERSION = '0.0.1'
5
+ end
@@ -0,0 +1,529 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+
5
+ module OxTenderAbstract
6
+ # XML parser for tender documents
7
+ class XmlParser
8
+ include ContextualLogger
9
+
10
+ def initialize
11
+ # XML parser initialization
12
+ end
13
+
14
+ # Parse XML document and return structured data
15
+ def parse(xml_content)
16
+ return Result.failure('Empty XML content') if xml_content.nil? || xml_content.empty?
17
+
18
+ begin
19
+ doc = Nokogiri::XML(xml_content)
20
+
21
+ # Check XML validity
22
+ return Result.failure('Invalid XML') if doc.errors.any?
23
+
24
+ # Detect document type
25
+ document_type = detect_document_type(doc)
26
+
27
+ # Extract data based on type
28
+ parsed_data = case document_type
29
+ when :tender
30
+ parse_tender_document(doc)
31
+ when :contract
32
+ parse_contract_document(doc)
33
+ when :organization
34
+ parse_organization_document(doc)
35
+ else
36
+ parse_generic_document(doc)
37
+ end
38
+
39
+ Result.success({
40
+ document_type: document_type,
41
+ root_element: doc.root.name,
42
+ namespace: doc.root.namespace&.href,
43
+ content: parsed_data
44
+ })
45
+ rescue StandardError => e
46
+ Result.failure("XML parsing error: #{e.message}")
47
+ end
48
+ end
49
+
50
+ # Extract attachments information from XML
51
+ def extract_attachments(xml_content)
52
+ return Result.failure('Empty XML content') if xml_content.nil? || xml_content.empty?
53
+
54
+ begin
55
+ doc = Nokogiri::XML(xml_content)
56
+ namespaces = extract_namespaces(doc)
57
+
58
+ # Find various attachment node patterns
59
+ attachment_nodes = []
60
+
61
+ # Common attachment paths
62
+ attachment_paths = [
63
+ '//ns4:attachmentInfo',
64
+ '//attachmentInfo',
65
+ '//ns5:attachmentsInfo//ns4:attachmentInfo',
66
+ '//attachmentsInfo//attachmentInfo'
67
+ ]
68
+
69
+ attachment_paths.each do |path|
70
+ nodes = doc.xpath(path, namespaces)
71
+ attachment_nodes.concat(nodes) if nodes.any?
72
+ end
73
+
74
+ attachments = attachment_nodes.map { |node| extract_attachment_info(node) }.compact
75
+
76
+ Result.success({
77
+ attachments: attachments,
78
+ total_count: attachments.size
79
+ })
80
+ rescue StandardError => e
81
+ Result.failure("Attachment extraction error: #{e.message}")
82
+ end
83
+ end
84
+
85
+ private
86
+
87
+ def detect_document_type(doc)
88
+ root_name = doc.root.name.downcase
89
+
90
+ case root_name
91
+ when /notification/, /tender/, /auction/
92
+ :tender
93
+ when /contract/
94
+ :contract
95
+ when /organization/, /org/
96
+ :organization
97
+ else
98
+ # Additional detection based on content (without namespaces for simple detection)
99
+ if doc.xpath('//purchaseNumber').any? || doc.xpath("//*[local-name()='purchaseNumber']").any?
100
+ :tender
101
+ elsif doc.xpath('//contractNumber').any? || doc.xpath("//*[local-name()='contractNumber']").any?
102
+ :contract
103
+ else
104
+ :unknown
105
+ end
106
+ end
107
+ end
108
+
109
+ def parse_tender_document(doc)
110
+ namespaces = extract_namespaces(doc)
111
+
112
+ log_debug "Parsing tender document with namespaces: #{namespaces.keys}"
113
+
114
+ # Basic tender information
115
+ tender_data = {
116
+ reestr_number: find_text_with_namespaces(doc, [
117
+ '//ns5:purchaseNumber',
118
+ '//purchaseNumber',
119
+ '//ns5:commonInfo/ns5:purchaseNumber',
120
+ '//commonInfo/purchaseNumber'
121
+ ], namespaces),
122
+
123
+ doc_number: find_text_with_namespaces(doc, [
124
+ '//ns5:docNumber',
125
+ '//docNumber',
126
+ '//ns5:commonInfo/ns5:docNumber'
127
+ ], namespaces),
128
+
129
+ title: find_text_with_namespaces(doc, [
130
+ '//ns5:purchaseObjectInfo',
131
+ '//purchaseObjectInfo',
132
+ '//ns5:commonInfo/ns5:purchaseObjectInfo'
133
+ ], namespaces),
134
+
135
+ placement_type: find_text_with_namespaces(doc, [
136
+ '//ns5:placingWay/ns2:name',
137
+ '//placingWay/name',
138
+ '//ns2:name'
139
+ ], namespaces),
140
+
141
+ publish_date: extract_date_from_text(find_text_with_namespaces(doc, [
142
+ '//ns5:publishDTInEIS',
143
+ '//publishDTInEIS',
144
+ '//ns5:commonInfo/ns5:publishDTInEIS'
145
+ ], namespaces)),
146
+
147
+ planned_publish_date: extract_date_from_text(find_text_with_namespaces(doc, [
148
+ '//ns5:plannedPublishDate',
149
+ '//plannedPublishDate'
150
+ ], namespaces)),
151
+
152
+ # Contract information
153
+ max_price: extract_price_from_text(find_text_with_namespaces(doc, [
154
+ '//ns5:maxPrice',
155
+ '//maxPrice',
156
+ '//ns5:contractConditionsInfo/ns5:maxPriceInfo/ns5:maxPrice',
157
+ '//ns5:notificationInfo/ns5:contractConditionsInfo/ns5:maxPriceInfo/ns5:maxPrice'
158
+ ], namespaces)),
159
+
160
+ currency: find_text_with_namespaces(doc, [
161
+ '//ns5:currency/ns2:name',
162
+ '//currency/name',
163
+ '//ns2:name[parent::currency]'
164
+ ], namespaces),
165
+
166
+ # Dates
167
+ start_date: extract_date_from_text(find_text_with_namespaces(doc, [
168
+ '//ns5:startDT',
169
+ '//startDT',
170
+ '//ns5:collectingInfo/ns5:startDT'
171
+ ], namespaces)),
172
+
173
+ end_date: extract_date_from_text(find_text_with_namespaces(doc, [
174
+ '//ns5:endDT',
175
+ '//endDT',
176
+ '//ns5:collectingInfo/ns5:endDT'
177
+ ], namespaces)),
178
+
179
+ bidding_date: extract_date_from_text(find_text_with_namespaces(doc, [
180
+ '//ns5:biddingDate',
181
+ '//biddingDate'
182
+ ], namespaces)),
183
+
184
+ summarizing_date: extract_date_from_text(find_text_with_namespaces(doc, [
185
+ '//ns5:summarizingDate',
186
+ '//summarizingDate'
187
+ ], namespaces)),
188
+
189
+ # Organization info
190
+ organization_name: find_text_with_namespaces(doc, [
191
+ '//ns5:responsibleOrgInfo/ns5:fullName',
192
+ '//responsibleOrgInfo/fullName',
193
+ '//ns5:fullName',
194
+ '//fullName'
195
+ ], namespaces),
196
+
197
+ organization_short_name: find_text_with_namespaces(doc, [
198
+ '//ns5:responsibleOrgInfo/ns5:shortName',
199
+ '//responsibleOrgInfo/shortName',
200
+ '//ns5:shortName',
201
+ '//shortName'
202
+ ], namespaces),
203
+
204
+ organization_inn: find_text_with_namespaces(doc, [
205
+ '//ns5:responsibleOrgInfo/ns5:INN',
206
+ '//responsibleOrgInfo/INN',
207
+ '//ns5:INN',
208
+ '//INN'
209
+ ], namespaces),
210
+
211
+ organization_kpp: find_text_with_namespaces(doc, [
212
+ '//ns5:responsibleOrgInfo/ns5:KPP',
213
+ '//responsibleOrgInfo/KPP',
214
+ '//ns5:KPP',
215
+ '//KPP'
216
+ ], namespaces),
217
+
218
+ organization_reg_num: find_text_with_namespaces(doc, [
219
+ '//ns5:responsibleOrgInfo/ns5:regNum',
220
+ '//responsibleOrgInfo/regNum',
221
+ '//ns5:regNum',
222
+ '//regNum'
223
+ ], namespaces),
224
+
225
+ # Contact information
226
+ contact_email: find_text_with_namespaces(doc, [
227
+ '//ns5:contactEMail',
228
+ '//contactEMail',
229
+ '//ns5:responsibleInfo/ns5:contactEMail'
230
+ ], namespaces),
231
+
232
+ contact_phone: find_text_with_namespaces(doc, [
233
+ '//ns5:contactPhone',
234
+ '//contactPhone',
235
+ '//ns5:responsibleInfo/ns5:contactPhone'
236
+ ], namespaces),
237
+
238
+ contact_fax: find_text_with_namespaces(doc, [
239
+ '//ns5:contactFax',
240
+ '//contactFax',
241
+ '//ns5:responsibleInfo/ns5:contactFax'
242
+ ], namespaces),
243
+
244
+ # Contact person details
245
+ contact_person_name: extract_contact_person_name(doc, namespaces),
246
+
247
+ # Address information
248
+ post_address: find_text_with_namespaces(doc, [
249
+ '//ns5:responsibleOrgInfo/ns5:postAddress',
250
+ '//responsibleOrgInfo/postAddress',
251
+ '//ns5:postAddress',
252
+ '//postAddress'
253
+ ], namespaces),
254
+
255
+ fact_address: find_text_with_namespaces(doc, [
256
+ '//ns5:responsibleOrgInfo/ns5:factAddress',
257
+ '//responsibleOrgInfo/factAddress',
258
+ '//ns5:factAddress',
259
+ '//factAddress'
260
+ ], namespaces),
261
+
262
+ # Electronic trading platform
263
+ etp_name: find_text_with_namespaces(doc, [
264
+ '//ns5:ETP/ns2:name',
265
+ '//ETP/name'
266
+ ], namespaces),
267
+
268
+ etp_url: find_text_with_namespaces(doc, [
269
+ '//ns5:ETP/ns2:url',
270
+ '//ETP/url'
271
+ ], namespaces),
272
+
273
+ etp_code: find_text_with_namespaces(doc, [
274
+ '//ns5:ETP/ns2:code',
275
+ '//ETP/code'
276
+ ], namespaces),
277
+
278
+ # URLs
279
+ href: find_text_with_namespaces(doc, [
280
+ '//ns5:href',
281
+ '//href'
282
+ ], namespaces),
283
+
284
+ print_form_url: find_text_with_namespaces(doc, [
285
+ '//ns5:printFormInfo/ns4:url',
286
+ '//printFormInfo/url'
287
+ ], namespaces),
288
+
289
+ # Additional fields
290
+ version_number: find_text_with_namespaces(doc, [
291
+ '//ns5:versionNumber',
292
+ '//versionNumber'
293
+ ], namespaces),
294
+
295
+ external_id: find_text_with_namespaces(doc, [
296
+ '//ns5:externalId',
297
+ '//externalId'
298
+ ], namespaces),
299
+
300
+ # Contract guarantee
301
+ contract_guarantee_amount: extract_price_from_text(find_text_with_namespaces(doc, [
302
+ '//ns5:contractGuarantee/ns5:amount',
303
+ '//contractGuarantee/amount'
304
+ ], namespaces)),
305
+
306
+ contract_guarantee_part: find_text_with_namespaces(doc, [
307
+ '//ns5:contractGuarantee/ns5:part',
308
+ '//contractGuarantee/part'
309
+ ], namespaces)&.to_f,
310
+
311
+ # Additional service signs
312
+ is_include_koks: find_text_with_namespaces(doc, [
313
+ '//ns5:serviceSigns/ns5:isIncludeKOKS',
314
+ '//serviceSigns/isIncludeKOKS'
315
+ ], namespaces) == 'true',
316
+
317
+ # Customer information
318
+ customer_full_name: find_text_with_namespaces(doc, [
319
+ '//ns5:customer/ns2:fullName',
320
+ '//customer/fullName'
321
+ ], namespaces),
322
+
323
+ customer_reg_num: find_text_with_namespaces(doc, [
324
+ '//ns5:customer/ns2:regNum',
325
+ '//customer/regNum'
326
+ ], namespaces)
327
+ }
328
+
329
+ # Additional processing
330
+ tender_data[:procedure_info] = extract_procedure_info(doc, namespaces)
331
+ tender_data[:lot_info] = extract_lot_information(doc, namespaces)
332
+ tender_data[:guarantee_info] = extract_guarantee_info(doc, namespaces)
333
+
334
+ # Clean up empty values
335
+ tender_data.compact
336
+ end
337
+
338
+ def parse_contract_document(doc)
339
+ namespaces = extract_namespaces(doc)
340
+
341
+ {
342
+ contract_number: find_text_with_namespaces(doc, [
343
+ '//contractNumber',
344
+ '//ns5:contractNumber'
345
+ ], namespaces),
346
+
347
+ # Add contract-specific parsing logic here
348
+ document_parsed_at: Time.now
349
+ }
350
+ end
351
+
352
+ def parse_organization_document(doc)
353
+ namespaces = extract_namespaces(doc)
354
+
355
+ {
356
+ organization_name: find_text_with_namespaces(doc, [
357
+ '//fullName',
358
+ '//ns5:fullName'
359
+ ], namespaces),
360
+
361
+ # Add organization-specific parsing logic here
362
+ document_parsed_at: Time.now
363
+ }
364
+ end
365
+
366
+ def parse_generic_document(doc)
367
+ {
368
+ root_element: doc.root.name,
369
+ namespace: doc.root.namespace&.href,
370
+ element_count: doc.xpath('//*').count,
371
+ document_parsed_at: Time.now
372
+ }
373
+ end
374
+
375
+ def extract_attachment_info(node)
376
+ {
377
+ published_content_id: extract_text_from_node(node, './/ns4:publishedContentId | .//publishedContentId'),
378
+ file_name: extract_text_from_node(node, './/ns4:fileName | .//fileName'),
379
+ file_size: extract_text_from_node(node, './/ns4:fileSize | .//fileSize')&.to_i,
380
+ description: extract_text_from_node(node, './/ns4:docDescription | .//docDescription'),
381
+ url: extract_text_from_node(node, './/ns4:url | .//url'),
382
+ doc_kind: extract_text_from_node(node, './/ns4:docKindInfo/ns2:name | .//docKindInfo/name | .//ns2:name'),
383
+ doc_date: extract_date_from_text(extract_text_from_node(node, './/ns4:docDate | .//docDate'))
384
+ }.compact
385
+ end
386
+
387
+ def extract_namespaces(doc)
388
+ doc.collect_namespaces
389
+ end
390
+
391
+ def find_text_with_namespaces(doc, xpaths, namespaces)
392
+ xpaths.each do |xpath|
393
+ node = doc.at_xpath(xpath, namespaces)
394
+ text = node&.text&.strip
395
+ return text if text && !text.empty?
396
+ rescue StandardError => e
397
+ log_debug "XPath error for '#{xpath}': #{e.message}"
398
+ next
399
+ end
400
+ nil
401
+ end
402
+
403
+ def extract_text_from_node(node, xpath)
404
+ node.at_xpath(xpath)&.text&.strip
405
+ end
406
+
407
+ def extract_price_from_text(text)
408
+ return nil if text.nil? || text.empty?
409
+
410
+ # Remove any non-digit characters except decimal separator
411
+ cleaned = text.gsub(/[^\d.,]/, '')
412
+ return nil if cleaned.empty?
413
+
414
+ # Convert to string with proper decimal separator
415
+ result = cleaned.tr(',', '.')
416
+ return result if result =~ /^\d+(\.\d+)?$/
417
+
418
+ nil
419
+ rescue StandardError
420
+ nil
421
+ end
422
+
423
+ def extract_date_from_text(text)
424
+ return nil if text.nil? || text.empty?
425
+
426
+ # Try to parse various date formats
427
+ [
428
+ '%Y-%m-%dT%H:%M:%S%z', # ISO 8601 with timezone
429
+ '%Y-%m-%dT%H:%M:%S', # ISO 8601 without timezone
430
+ '%Y-%m-%d%z', # Date with timezone
431
+ '%Y-%m-%d', # Simple date
432
+ '%d.%m.%Y', # Russian format
433
+ '%d/%m/%Y' # Alternative format
434
+ ].each do |format|
435
+ return Time.strptime(text, format)
436
+ rescue ArgumentError
437
+ next
438
+ end
439
+
440
+ # Try natural parsing as fallback
441
+ begin
442
+ Time.parse(text)
443
+ rescue StandardError
444
+ nil
445
+ end
446
+ end
447
+
448
+ def extract_procedure_info(doc, namespaces)
449
+ {
450
+ collecting_start: extract_date_from_text(find_text_with_namespaces(doc, [
451
+ '//ns5:collectingInfo/ns5:startDT',
452
+ '//collectingInfo/startDT'
453
+ ], namespaces)),
454
+
455
+ collecting_end: extract_date_from_text(find_text_with_namespaces(doc, [
456
+ '//ns5:collectingInfo/ns5:endDT',
457
+ '//collectingInfo/endDT'
458
+ ], namespaces))
459
+ }.compact
460
+ end
461
+
462
+ def extract_lot_information(doc, namespaces)
463
+ lot_nodes = doc.xpath('//ns5:lotInfo | //lotInfo', namespaces)
464
+ return {} if lot_nodes.empty?
465
+
466
+ lots = lot_nodes.map do |lot_node|
467
+ {
468
+ lot_number: extract_text_from_node(lot_node, './/ns5:lotNumber | .//lotNumber'),
469
+ lot_name: extract_text_from_node(lot_node, './/ns5:lotName | .//lotName'),
470
+ max_price: extract_price_from_text(extract_text_from_node(lot_node, './/ns5:maxPrice | .//maxPrice'))
471
+ }.compact
472
+ end
473
+
474
+ { lots: lots, lots_count: lots.size }
475
+ end
476
+
477
+ def extract_guarantee_info(doc, namespaces)
478
+ {
479
+ contract_guarantee_part: find_text_with_namespaces(doc, [
480
+ '//ns5:contractGuarantee/ns5:part',
481
+ '//contractGuarantee/part'
482
+ ], namespaces)&.to_f,
483
+
484
+ application_guarantee_part: find_text_with_namespaces(doc, [
485
+ '//ns5:applicationGuarantee/ns5:part',
486
+ '//applicationGuarantee/part'
487
+ ], namespaces)&.to_f
488
+ }.compact
489
+ end
490
+
491
+ def extract_contact_person_name(doc, namespaces)
492
+ # Try to get full name first
493
+ full_name = find_text_with_namespaces(doc, [
494
+ '//ns5:responsibleInfo/ns5:contactPersonInfo/ns2:fullName',
495
+ '//responsibleInfo/contactPersonInfo/fullName'
496
+ ], namespaces)
497
+
498
+ return full_name if full_name
499
+
500
+ # If no full name, construct from parts
501
+ last_name = find_text_with_namespaces(doc, [
502
+ '//ns5:responsibleInfo/ns5:contactPersonInfo/ns4:lastName',
503
+ '//ns5:contactPersonInfo/ns4:lastName',
504
+ '//responsibleInfo/contactPersonInfo/lastName',
505
+ '//contactPersonInfo/lastName'
506
+ ], namespaces)
507
+
508
+ first_name = find_text_with_namespaces(doc, [
509
+ '//ns5:responsibleInfo/ns5:contactPersonInfo/ns4:firstName',
510
+ '//ns5:contactPersonInfo/ns4:firstName',
511
+ '//responsibleInfo/contactPersonInfo/firstName',
512
+ '//contactPersonInfo/firstName'
513
+ ], namespaces)
514
+
515
+ middle_name = find_text_with_namespaces(doc, [
516
+ '//ns5:responsibleInfo/ns5:contactPersonInfo/ns4:middleName',
517
+ '//ns5:contactPersonInfo/ns4:middleName',
518
+ '//responsibleInfo/contactPersonInfo/middleName',
519
+ '//contactPersonInfo/middleName'
520
+ ], namespaces)
521
+
522
+ # Construct name from parts
523
+ name_parts = [last_name, first_name, middle_name].compact
524
+ return name_parts.join(' ') unless name_parts.empty?
525
+
526
+ nil
527
+ end
528
+ end
529
+ end
@@ -0,0 +1,2 @@
1
+ require_relative '../oxtenderabstract'
2
+ require_relative '../oxtenderabstract/compatibility'