ox-tender-abstract 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec_status +18 -18
- data/lib/oxtenderabstract/archive_processor.rb +75 -79
- data/lib/oxtenderabstract/version.rb +1 -1
- data/lib/oxtenderabstract/xml_parser.rb +35 -15
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c1e519805302809f117b67e75099300485fc5910038e6ab6012eb9a4343f8e0b
|
4
|
+
data.tar.gz: ffccc77f4305ca644cbe5128a2f1178e8ff30170b25180686fc3db12d5a19761
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d062a1e8139143c3c86490c54b9e367f9e6053d27e2a981f8202b2496d187d56f11e2b1b2f101c1c3b9ae69045e5388f34365f9d88e77f480177603c8fd7a1f8
|
7
|
+
data.tar.gz: 57710ad7e471c7165d2878b7ffcbac5a2fbd6ffba10bc9c6c2a47af4015ca639df1d53f1cf89956858838dce7d18efdfca811a4d5f5bef2d53fb46c6a4fa14b7
|
data/.rspec_status
CHANGED
@@ -49,26 +49,26 @@ example_id | status | run_time
|
|
49
49
|
./spec/oxtenderabstract/result_spec.rb[1:3:2] | passed | 0.00002 seconds |
|
50
50
|
./spec/oxtenderabstract/result_spec.rb[1:4:1] | passed | 0.00002 seconds |
|
51
51
|
./spec/oxtenderabstract/result_spec.rb[1:4:2] | passed | 0.00002 seconds |
|
52
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:1:1] | passed | 0.
|
53
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:2:1] | passed | 0.
|
54
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:3:1] | failed | 0.
|
55
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:3:2] | passed | 0.
|
56
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:3:3] | failed | 0.
|
57
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:4:1] | passed | 0.
|
58
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:4:2] | passed | 0.
|
59
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:4:3] | passed | 0.
|
60
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:5:1] | passed | 0.
|
61
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:6:1] | passed | 0.
|
62
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:7:1] | passed | 0.
|
63
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:2:1:1] | failed | 0.
|
64
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:2:2:1] | failed | 0.
|
65
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:2:3:1] | failed | 0.
|
52
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:1:1] | passed | 0.00192 seconds |
|
53
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:2:1] | passed | 0.00007 seconds |
|
54
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:3:1] | failed | 0.00615 seconds |
|
55
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:3:2] | passed | 0.00098 seconds |
|
56
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:3:3] | failed | 0.00089 seconds |
|
57
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:4:1] | passed | 0.00194 seconds |
|
58
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:4:2] | passed | 0.00168 seconds |
|
59
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:4:3] | passed | 0.00086 seconds |
|
60
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:5:1] | passed | 0.00008 seconds |
|
61
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:6:1] | passed | 0.00007 seconds |
|
62
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:1:7:1] | passed | 0.00007 seconds |
|
63
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:2:1:1] | failed | 0.00016 seconds |
|
64
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:2:2:1] | failed | 0.00011 seconds |
|
65
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:2:3:1] | failed | 0.00015 seconds |
|
66
66
|
./spec/oxtenderabstract/xml_parser_spec.rb[1:3:1] | passed | 0.00005 seconds |
|
67
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:3:2] | passed | 0.
|
67
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:3:2] | passed | 0.00004 seconds |
|
68
68
|
./spec/oxtenderabstract/xml_parser_spec.rb[1:3:3] | passed | 0.00004 seconds |
|
69
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:3:4] | passed | 0.
|
70
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:4:1:1] | failed | 0.
|
71
|
-
./spec/oxtenderabstract/xml_parser_spec.rb[1:4:2:1] |
|
69
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:3:4] | passed | 0.00006 seconds |
|
70
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:4:1:1] | failed | 0.00131 seconds |
|
71
|
+
./spec/oxtenderabstract/xml_parser_spec.rb[1:4:2:1] | passed | 0.00015 seconds |
|
72
72
|
./spec/oxtenderabstract_spec.rb[1:1:1] | passed | 0.00053 seconds |
|
73
73
|
./spec/oxtenderabstract_spec.rb[1:2:1] | passed | 0.00003 seconds |
|
74
74
|
./spec/oxtenderabstract_spec.rb[1:3:1] | passed | 0.00003 seconds |
|
@@ -1,26 +1,26 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
3
|
+
require 'net/http'
|
4
|
+
require 'uri'
|
5
|
+
require 'openssl'
|
6
|
+
require 'zlib'
|
7
|
+
require 'stringio'
|
8
|
+
require 'zip'
|
9
9
|
|
10
10
|
module OxTenderAbstract
|
11
11
|
# Archive processor for downloading and extracting archive files
|
12
12
|
class ArchiveProcessor
|
13
13
|
include ContextualLogger
|
14
14
|
|
15
|
-
MAX_FILE_SIZE_BYTES = 100 * 1024 * 1024
|
16
|
-
|
15
|
+
MAX_FILE_SIZE_BYTES = 100 * 1024 * 1024 # 100 MB in bytes
|
16
|
+
|
17
17
|
def initialize
|
18
18
|
# Archive processor initialization
|
19
19
|
end
|
20
20
|
|
21
21
|
# Download and extract archive data
|
22
22
|
def download_and_extract(archive_url)
|
23
|
-
return Result.failure(
|
23
|
+
return Result.failure('Empty archive URL') if archive_url.nil? || archive_url.empty?
|
24
24
|
|
25
25
|
begin
|
26
26
|
# Download archive to memory
|
@@ -30,9 +30,9 @@ module OxTenderAbstract
|
|
30
30
|
content = download_result.data[:content]
|
31
31
|
|
32
32
|
# Determine archive format by first bytes
|
33
|
-
first_bytes = content[0..1].
|
33
|
+
first_bytes = content[0..1].unpack1('H*')
|
34
34
|
|
35
|
-
if first_bytes ==
|
35
|
+
if first_bytes == '1f8b'
|
36
36
|
# This is GZIP archive - decompress GZIP, then ZIP
|
37
37
|
gunzip_result = decompress_gzip(content)
|
38
38
|
return gunzip_result if gunzip_result.failure?
|
@@ -40,25 +40,25 @@ module OxTenderAbstract
|
|
40
40
|
zip_result = extract_zip_from_memory(gunzip_result.data[:content])
|
41
41
|
|
42
42
|
Result.success({
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
elsif content[0..1] ==
|
43
|
+
files: zip_result,
|
44
|
+
total_size: download_result.data[:size],
|
45
|
+
compressed_size: gunzip_result.data[:compressed_size],
|
46
|
+
file_count: zip_result.size
|
47
|
+
})
|
48
|
+
elsif content[0..1] == 'PK'
|
49
49
|
# This is already ZIP archive - parse directly
|
50
50
|
zip_result = extract_zip_from_memory(content)
|
51
51
|
|
52
52
|
Result.success({
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
53
|
+
files: zip_result,
|
54
|
+
total_size: download_result.data[:size],
|
55
|
+
compressed_size: nil,
|
56
|
+
file_count: zip_result.size
|
57
|
+
})
|
58
58
|
else
|
59
|
-
Result.failure(
|
59
|
+
Result.failure('Unknown archive format (not GZIP and not ZIP)')
|
60
60
|
end
|
61
|
-
rescue => e
|
61
|
+
rescue StandardError => e
|
62
62
|
Result.failure("Archive processing error: #{e.message}")
|
63
63
|
end
|
64
64
|
end
|
@@ -69,9 +69,7 @@ module OxTenderAbstract
|
|
69
69
|
begin
|
70
70
|
uri = URI.parse(url)
|
71
71
|
# Check if URI is valid HTTP/HTTPS
|
72
|
-
unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
73
|
-
return Result.failure("Invalid URL: not HTTP/HTTPS")
|
74
|
-
end
|
72
|
+
return Result.failure('Invalid URL: not HTTP/HTTPS') unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
75
73
|
rescue URI::InvalidURIError => e
|
76
74
|
return Result.failure("Invalid URL: #{e.message}")
|
77
75
|
end
|
@@ -80,8 +78,8 @@ module OxTenderAbstract
|
|
80
78
|
http = create_http_client(uri)
|
81
79
|
|
82
80
|
request = Net::HTTP::Get.new(uri.request_uri)
|
83
|
-
request[
|
84
|
-
request[
|
81
|
+
request['User-Agent'] = "OxTenderAbstract/#{OxTenderAbstract::VERSION}"
|
82
|
+
request['individualPerson_token'] = OxTenderAbstract.configuration.token
|
85
83
|
|
86
84
|
log_debug "Downloading archive from: #{url}"
|
87
85
|
|
@@ -101,20 +99,20 @@ module OxTenderAbstract
|
|
101
99
|
log_debug "Downloaded archive: #{size} bytes"
|
102
100
|
|
103
101
|
Result.success({
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
102
|
+
content: content,
|
103
|
+
size: size,
|
104
|
+
content_type: response['content-type']
|
105
|
+
})
|
108
106
|
rescue SocketError, Timeout::Error => e
|
109
107
|
Result.failure("Network error: #{e.message}")
|
110
|
-
rescue => e
|
108
|
+
rescue StandardError => e
|
111
109
|
Result.failure("Download error: #{e.message}")
|
112
110
|
end
|
113
111
|
end
|
114
112
|
|
115
113
|
def create_http_client(uri)
|
116
114
|
http = Net::HTTP.new(uri.host, uri.port)
|
117
|
-
http.use_ssl = uri.scheme ==
|
115
|
+
http.use_ssl = uri.scheme == 'https'
|
118
116
|
http.verify_mode = OxTenderAbstract.configuration.ssl_verify ? OpenSSL::SSL::VERIFY_PEER : OpenSSL::SSL::VERIFY_NONE
|
119
117
|
http.open_timeout = OxTenderAbstract.configuration.timeout_open
|
120
118
|
http.read_timeout = OxTenderAbstract.configuration.timeout_read
|
@@ -122,54 +120,52 @@ module OxTenderAbstract
|
|
122
120
|
end
|
123
121
|
|
124
122
|
def decompress_gzip(gzip_content)
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
Result.failure("Decompression error: #{e.message}")
|
141
|
-
end
|
123
|
+
log_debug 'Decompressing GZIP archive'
|
124
|
+
|
125
|
+
gz = Zlib::GzipReader.new(StringIO.new(gzip_content))
|
126
|
+
decompressed_content = gz.read
|
127
|
+
gz.close
|
128
|
+
|
129
|
+
Result.success({
|
130
|
+
content: decompressed_content,
|
131
|
+
compressed_size: gzip_content.bytesize,
|
132
|
+
decompressed_size: decompressed_content.bytesize
|
133
|
+
})
|
134
|
+
rescue Zlib::GzipFile::Error => e
|
135
|
+
Result.failure("GZIP decompression error: #{e.message}")
|
136
|
+
rescue StandardError => e
|
137
|
+
Result.failure("Decompression error: #{e.message}")
|
142
138
|
end
|
143
139
|
|
144
140
|
def extract_zip_from_memory(zip_content)
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
log_debug "Extracting file: #{entry.name} (#{entry.size} bytes)"
|
156
|
-
|
157
|
-
files[entry.name] = {
|
158
|
-
content: entry.get_input_stream.read,
|
159
|
-
size: entry.size,
|
160
|
-
compressed_size: entry.compressed_size,
|
161
|
-
crc: entry.crc
|
162
|
-
}
|
163
|
-
end
|
164
|
-
end
|
141
|
+
log_debug 'Extracting ZIP archive from memory'
|
142
|
+
|
143
|
+
files = {}
|
144
|
+
zip_io = StringIO.new(zip_content)
|
145
|
+
|
146
|
+
Zip::File.open_buffer(zip_io) do |zip_file|
|
147
|
+
zip_file.each do |entry|
|
148
|
+
next if entry.directory?
|
149
|
+
|
150
|
+
log_debug "Extracting file: #{entry.name} (#{entry.size} bytes)"
|
165
151
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
152
|
+
content = entry.get_input_stream.read
|
153
|
+
|
154
|
+
files[entry.name] = {
|
155
|
+
content: content,
|
156
|
+
size: entry.size,
|
157
|
+
compressed_size: entry.compressed_size,
|
158
|
+
crc: entry.crc
|
159
|
+
}
|
160
|
+
end
|
172
161
|
end
|
162
|
+
|
163
|
+
log_debug "Extracted #{files.size} files from ZIP archive"
|
164
|
+
files
|
165
|
+
rescue Zip::Error => e
|
166
|
+
raise ArchiveError, "ZIP extraction error: #{e.message}"
|
167
|
+
rescue StandardError => e
|
168
|
+
raise ArchiveError, "Archive extraction error: #{e.message}"
|
173
169
|
end
|
174
170
|
end
|
175
|
-
end
|
171
|
+
end
|
@@ -408,8 +408,11 @@ module OxTenderAbstract
|
|
408
408
|
def extract_price_from_text(text)
|
409
409
|
return nil if text.nil? || text.empty?
|
410
410
|
|
411
|
+
# Remove currency symbols and text like 'руб.', 'рублей' etc.
|
412
|
+
cleaned = text.gsub(/[а-яё]+\.?/i, '').strip
|
413
|
+
|
411
414
|
# Remove any non-digit characters except decimal separator and spaces
|
412
|
-
cleaned =
|
415
|
+
cleaned = cleaned.gsub(/[^\d\s.,]/, '').strip
|
413
416
|
return nil if cleaned.empty?
|
414
417
|
|
415
418
|
# Remove spaces (used as thousand separators in Russian format)
|
@@ -522,10 +525,13 @@ module OxTenderAbstract
|
|
522
525
|
|
523
526
|
def extract_purchase_object_data(object_node, namespaces)
|
524
527
|
# Basic object information
|
528
|
+
# CRITICAL FIX: Extract name that's direct child of purchaseObject, not from characteristics
|
529
|
+
direct_name = object_node.xpath('./ns4:name | ./name', namespaces).first&.text&.strip
|
530
|
+
|
525
531
|
object_data = {
|
526
532
|
sid: extract_text_from_node(object_node, './/ns4:sid | .//sid'),
|
527
533
|
external_sid: extract_text_from_node(object_node, './/ns4:externalSid | .//externalSid'),
|
528
|
-
name:
|
534
|
+
name: direct_name,
|
529
535
|
price: extract_price_from_text(extract_text_from_node(object_node, './/ns4:price | .//price')),
|
530
536
|
quantity: extract_text_from_node(object_node, './/ns4:quantity/ns4:value | .//quantity/value')&.to_i,
|
531
537
|
sum: extract_price_from_text(extract_text_from_node(object_node, './/ns4:sum | .//sum')),
|
@@ -573,7 +579,7 @@ module OxTenderAbstract
|
|
573
579
|
}
|
574
580
|
end
|
575
581
|
|
576
|
-
# Extract characteristics (
|
582
|
+
# Extract characteristics (detailed extraction)
|
577
583
|
characteristics_nodes = object_node.xpath(
|
578
584
|
'.//ns4:characteristics//ns4:characteristicsUsingReferenceInfo | .//characteristics//characteristicsUsingReferenceInfo', namespaces
|
579
585
|
)
|
@@ -582,32 +588,46 @@ module OxTenderAbstract
|
|
582
588
|
)
|
583
589
|
|
584
590
|
if characteristics_nodes.any?
|
591
|
+
characteristics_details = characteristics_nodes.map do |char_node|
|
592
|
+
char_data = {
|
593
|
+
name: extract_text_from_node(char_node, './/ns4:name | .//name'),
|
594
|
+
type: extract_text_from_node(char_node, './/ns4:type | .//type')
|
595
|
+
}
|
596
|
+
|
597
|
+
# Extract values from text form characteristics
|
598
|
+
values_nodes = char_node.xpath('.//ns4:values/ns4:value | .//values/value', namespaces)
|
599
|
+
if values_nodes.any?
|
600
|
+
char_data[:values] = values_nodes.map do |value_node|
|
601
|
+
extract_text_from_node(value_node, './/ns4:qualityDescription | .//qualityDescription') ||
|
602
|
+
extract_text_from_node(value_node, './/ns4:textValue | .//textValue')
|
603
|
+
end.compact
|
604
|
+
end
|
605
|
+
|
606
|
+
char_data
|
607
|
+
end
|
608
|
+
|
585
609
|
object_data[:characteristics] = {
|
586
610
|
count: characteristics_nodes.size,
|
587
|
-
details:
|
588
|
-
{
|
589
|
-
name: extract_text_from_node(char_node, './/ns4:name | .//name'),
|
590
|
-
type: extract_text_from_node(char_node, './/ns4:type | .//type')
|
591
|
-
}
|
592
|
-
end
|
611
|
+
details: characteristics_details
|
593
612
|
}
|
594
613
|
end
|
595
614
|
|
596
615
|
# Determine the actual product name from available sources
|
597
|
-
# Priority:
|
598
|
-
product_name =
|
599
|
-
|
616
|
+
# Priority: Direct name field (now fixed) > KTRU name > OKPD2 name
|
617
|
+
product_name = if object_data[:name] && !object_data[:name].empty?
|
618
|
+
object_data[:name]
|
619
|
+
elsif object_data[:ktru] && object_data[:ktru][:name] && !object_data[:ktru][:name].empty?
|
600
620
|
object_data[:ktru][:name]
|
601
621
|
elsif object_data[:okpd2] && object_data[:okpd2][:name] && !object_data[:okpd2][:name].empty?
|
602
622
|
object_data[:okpd2][:name]
|
603
623
|
else
|
604
|
-
|
624
|
+
'Unknown product'
|
605
625
|
end
|
606
626
|
|
607
627
|
object_data[:product_name] = product_name
|
608
628
|
|
609
|
-
#
|
610
|
-
object_data[:name_type] =
|
629
|
+
# Now the name field should contain actual product names, not characteristics
|
630
|
+
object_data[:name_type] = 'product_name'
|
611
631
|
|
612
632
|
object_data.compact
|
613
633
|
end
|