RubyGems - ox-tender-abstract - Versions diffs - 0.9.1 → 0.9.3 - Mend

ox-tender-abstract 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/.rspec_status +18 -18
data/CHANGELOG.md +11 -0
data/README.md +35 -0
data/lib/ox-tender-abstract.rb +154 -21
data/lib/oxtenderabstract/archive_processor.rb +192 -76
data/lib/oxtenderabstract/client.rb +170 -20
data/lib/oxtenderabstract/configuration.rb +5 -1
data/lib/oxtenderabstract/document_types.rb +72 -2
data/lib/oxtenderabstract/errors.rb +21 -9
data/lib/oxtenderabstract/version.rb +1 -1
data/lib/oxtenderabstract/xml_parser.rb +164 -23
metadata +1 -1

data/lib/oxtenderabstract/archive_processor.rb CHANGED Viewed

@@ -1,38 +1,40 @@
 # frozen_string_literal: true
-require "net/http"
-require "uri"
-require "openssl"
-require "zlib"
-require "stringio"
-require "zip"
+require 'net/http'
+require 'uri'
+require 'openssl'
+require 'zlib'
+require 'stringio'
+require 'zip'
 module OxTenderAbstract
   # Archive processor for downloading and extracting archive files
   class ArchiveProcessor
     include ContextualLogger
-    MAX_FILE_SIZE_BYTES = 100 * 1024 * 1024  # 100 MB in bytes
+    MAX_FILE_SIZE_BYTES = 100 * 1024 * 1024 # 100 MB in bytes
+    MAX_RETRY_ATTEMPTS = 3
+    RETRY_DELAY_SECONDS = 2
     def initialize
       # Archive processor initialization
     end
     # Download and extract archive data
     def download_and_extract(archive_url)
-      return Result.failure("Empty archive URL") if archive_url.nil? || archive_url.empty?
+      return Result.failure('Empty archive URL') if archive_url.nil? || archive_url.empty?
       begin
-        # Download archive to memory
-        download_result = download_to_memory(archive_url)
+        # Download archive to memory with retry logic
+        download_result = download_with_retry(archive_url)
         return download_result if download_result.failure?
         content = download_result.data[:content]
         # Determine archive format by first bytes
-        first_bytes = content[0..1].unpack("H*").first
+        first_bytes = content[0..1].unpack1('H*')
-        if first_bytes == "1f8b"
+        if first_bytes == '1f8b'
           # This is GZIP archive - decompress GZIP, then ZIP
           gunzip_result = decompress_gzip(content)
           return gunzip_result if gunzip_result.failure?
@@ -40,38 +42,73 @@ module OxTenderAbstract
           zip_result = extract_zip_from_memory(gunzip_result.data[:content])
           Result.success({
-            files: zip_result,
-            total_size: download_result.data[:size],
-            compressed_size: gunzip_result.data[:compressed_size],
-            file_count: zip_result.size
-          })
-        elsif content[0..1] == "PK"
+                           files: zip_result,
+                           total_size: download_result.data[:size],
+                           compressed_size: gunzip_result.data[:compressed_size],
+                           file_count: zip_result.size
+                         })
+        elsif content[0..1] == 'PK'
           # This is already ZIP archive - parse directly
           zip_result = extract_zip_from_memory(content)
           Result.success({
-            files: zip_result,
-            total_size: download_result.data[:size],
-            compressed_size: nil,
-            file_count: zip_result.size
-          })
+                           files: zip_result,
+                           total_size: download_result.data[:size],
+                           compressed_size: nil,
+                           file_count: zip_result.size
+                         })
         else
-          Result.failure("Unknown archive format (not GZIP and not ZIP)")
+          # Log first bytes for debugging
+          log_error "Unknown archive format. First 10 bytes: #{content[0..9].unpack1('H*')}"
+          Result.failure('Unknown archive format (not GZIP and not ZIP)')
         end
-      rescue => e
+      rescue StandardError => e
+        log_error "Archive processing error: #{e.message}"
+        log_error e.backtrace.first(3).join("\n") if e.backtrace
         Result.failure("Archive processing error: #{e.message}")
       end
     end
     private
+    def download_with_retry(archive_url)
+      attempt = 1
+      last_error = nil
+      while attempt <= MAX_RETRY_ATTEMPTS
+        begin
+          log_info "Download attempt #{attempt}/#{MAX_RETRY_ATTEMPTS} for archive"
+          result = download_to_memory(archive_url)
+          if result.success?
+            log_info "Download successful on attempt #{attempt}"
+            return result
+          else
+            last_error = result.error
+            log_warn "Download attempt #{attempt} failed: #{last_error}"
+          end
+        rescue StandardError => e
+          last_error = e.message
+          log_error "Download attempt #{attempt} exception: #{last_error}"
+        end
+        if attempt < MAX_RETRY_ATTEMPTS
+          sleep_time = RETRY_DELAY_SECONDS * attempt
+          log_info "Waiting #{sleep_time} seconds before retry..."
+          sleep(sleep_time)
+        end
+        attempt += 1
+      end
+      Result.failure("Download failed after #{MAX_RETRY_ATTEMPTS} attempts. Last error: #{last_error}")
+    end
     def download_to_memory(url)
       begin
         uri = URI.parse(url)
         # Check if URI is valid HTTP/HTTPS
-        unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-          return Result.failure("Invalid URL: not HTTP/HTTPS")
-        end
+        return Result.failure('Invalid URL: not HTTP/HTTPS') unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
       rescue URI::InvalidURIError => e
         return Result.failure("Invalid URL: #{e.message}")
       end
@@ -80,15 +117,45 @@ module OxTenderAbstract
         http = create_http_client(uri)
         request = Net::HTTP::Get.new(uri.request_uri)
-        request["User-Agent"] = "OxTenderAbstract/#{OxTenderAbstract::VERSION}"
-        request["individualPerson_token"] = OxTenderAbstract.configuration.token
+        request['User-Agent'] = "OxTenderAbstract/#{OxTenderAbstract::VERSION}"
+        request['individualPerson_token'] = OxTenderAbstract.configuration.token
-        log_debug "Downloading archive from: #{url}"
+        log_debug "Downloading archive from: #{url[0..100]}..."
         response = http.request(request)
+        # Enhanced error handling with response details
         unless response.is_a?(Net::HTTPSuccess)
-          return Result.failure("HTTP error: #{response.code} #{response.message}")
+          error_msg = "HTTP error: #{response.code} #{response.message}"
+          if response.body && !response.body.empty?
+            # Log first part of response body for debugging
+            body_preview = response.body[0..500]
+            log_error "Response body preview: #{body_preview}"
+            error_msg += ". Response: #{body_preview[0..100]}"
+          end
+          return Result.failure(error_msg)
+        end
+        # Check for download blocking message in successful response
+        if response.body&.include?('Скачивание архива по данной ссылке заблокировано')
+          if OxTenderAbstract.configuration.auto_wait_on_block
+            wait_time = OxTenderAbstract.configuration.block_wait_time
+            log_error "Archive download blocked. Auto-waiting for #{wait_time} seconds..."
+            # Показываем прогресс ожидания
+            show_wait_progress(wait_time)
+            log_info 'Wait completed, retrying download...'
+            # Рекурсивно повторяем попытку после ожидания
+            return download_to_memory(url)
+          else
+            log_error 'Archive download blocked for 10 minutes'
+            return Result.failure(
+              'Archive download blocked for 10 minutes',
+              error_type: :blocked,
+              retry_after: 600
+            )
+          end
         end
         content = response.body
@@ -98,78 +165,127 @@ module OxTenderAbstract
           return Result.failure("Archive too large: #{size} bytes (max: #{MAX_FILE_SIZE_BYTES})")
         end
-        log_debug "Downloaded archive: #{size} bytes"
+        return Result.failure('Empty archive downloaded') if size == 0
+        log_debug "Downloaded archive: #{size} bytes, content-type: #{response['content-type']}"
         Result.success({
-          content: content,
-          size: size,
-          content_type: response["content-type"]
-        })
+                         content: content,
+                         size: size,
+                         content_type: response['content-type']
+                       })
       rescue SocketError, Timeout::Error => e
         Result.failure("Network error: #{e.message}")
-      rescue => e
+      rescue StandardError => e
+        log_error "Download error details: #{e.class} - #{e.message}"
         Result.failure("Download error: #{e.message}")
       end
     end
     def create_http_client(uri)
       http = Net::HTTP.new(uri.host, uri.port)
-      http.use_ssl = uri.scheme == "https"
+      http.use_ssl = uri.scheme == 'https'
       http.verify_mode = OxTenderAbstract.configuration.ssl_verify ? OpenSSL::SSL::VERIFY_PEER : OpenSSL::SSL::VERIFY_NONE
       http.open_timeout = OxTenderAbstract.configuration.timeout_open
       http.read_timeout = OxTenderAbstract.configuration.timeout_read
+      # Add debug logging for HTTP client configuration
+      log_debug "HTTP client config: SSL=#{http.use_ssl?}, verify=#{http.verify_mode}, open_timeout=#{http.open_timeout}, read_timeout=#{http.read_timeout}"
       http
     end
     def decompress_gzip(gzip_content)
-      begin
-        log_debug "Decompressing GZIP archive"
-        gz = Zlib::GzipReader.new(StringIO.new(gzip_content))
-        decompressed_content = gz.read
-        gz.close
+      log_debug 'Decompressing GZIP archive'
-        Result.success({
-          content: decompressed_content,
-          compressed_size: gzip_content.bytesize,
-          decompressed_size: decompressed_content.bytesize
-        })
-      rescue Zlib::GzipFile::Error => e
-        Result.failure("GZIP decompression error: #{e.message}")
-      rescue => e
-        Result.failure("Decompression error: #{e.message}")
-      end
+      gz = Zlib::GzipReader.new(StringIO.new(gzip_content))
+      decompressed_content = gz.read
+      gz.close
+      log_debug "GZIP decompression: #{gzip_content.bytesize} -> #{decompressed_content.bytesize} bytes"
+      Result.success({
+                       content: decompressed_content,
+                       compressed_size: gzip_content.bytesize,
+                       decompressed_size: decompressed_content.bytesize
+                     })
+    rescue Zlib::GzipFile::Error => e
+      log_error "GZIP decompression error: #{e.message}"
+      Result.failure("GZIP decompression error: #{e.message}")
+    rescue StandardError => e
+      log_error "Decompression error: #{e.message}"
+      Result.failure("Decompression error: #{e.message}")
     end
     def extract_zip_from_memory(zip_content)
-      begin
-        log_debug "Extracting ZIP archive from memory"
-        files = {}
-        zip_io = StringIO.new(zip_content)
-        Zip::File.open_buffer(zip_io) do |zip_file|
-          zip_file.each do |entry|
-            next if entry.directory?
-            log_debug "Extracting file: #{entry.name} (#{entry.size} bytes)"
+      log_debug "Extracting ZIP archive from memory (#{zip_content.bytesize} bytes)"
+      files = {}
+      zip_io = StringIO.new(zip_content)
+      Zip::File.open_buffer(zip_io) do |zip_file|
+        zip_file.each do |entry|
+          next if entry.directory?
+          log_debug "Extracting file: #{entry.name} (#{entry.size} bytes)"
+          begin
+            content = entry.get_input_stream.read
             files[entry.name] = {
-              content: entry.get_input_stream.read,
+              content: content,
               size: entry.size,
               compressed_size: entry.compressed_size,
               crc: entry.crc
             }
+          rescue StandardError => e
+            log_error "Error extracting file #{entry.name}: #{e.message}"
+            # Continue with other files instead of failing completely
           end
         end
+      end
+      log_debug "Extracted #{files.size} files from ZIP archive"
+      files
+    rescue Zip::Error => e
+      log_error "ZIP extraction error: #{e.message}"
+      raise ArchiveError, "ZIP extraction error: #{e.message}"
+    rescue StandardError => e
+      log_error "Archive extraction error: #{e.message}"
+      log_error e.backtrace.first(3).join("\n") if e.backtrace
+      raise ArchiveError, "Archive extraction error: #{e.message}"
+    end
+    # Show wait progress during API block
+    def show_wait_progress(total_seconds)
+      return if total_seconds <= 0
+      log_info "Waiting #{total_seconds} seconds for API block to expire..."
+      # Показываем прогресс каждые 30 секунд для больших интервалов
+      if total_seconds > 60
+        intervals = [30, 60, 120, 180, 300].select { |i| i < total_seconds }
-        log_debug "Extracted #{files.size} files from ZIP archive"
-        files
-      rescue Zip::Error => e
-        raise ArchiveError, "ZIP extraction error: #{e.message}"
-      rescue => e
-        raise ArchiveError, "Archive extraction error: #{e.message}"
+        intervals.each do |interval|
+          sleep(interval)
+          remaining = total_seconds - interval
+          total_seconds = remaining
+          if remaining > 60
+            log_info "Still waiting... #{remaining} seconds remaining (#{(remaining / 60.0).round(1)} minutes)"
+          else
+            log_info "Still waiting... #{remaining} seconds remaining"
+          end
+        end
+        # Ждем оставшееся время
+        sleep(total_seconds) if total_seconds > 0
+      else
+        # Для коротких интервалов просто ждем
+        sleep(total_seconds)
       end
+      log_info 'Wait period completed!'
     end
   end
-end
+end

data/lib/oxtenderabstract/client.rb CHANGED Viewed

@@ -77,7 +77,7 @@ module OxTenderAbstract
     # Search tenders with full workflow: API -> Archive -> Parse
     def search_tenders(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
                        document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE)
-      log_info "Starting tender search for region #{org_region}, date #{exact_date}"
+      log_info "Starting tender search for region #{org_region}, date #{exact_date}, subsystem: #{subsystem_type}, type: #{document_type}"
       # Step 1: Get archive URLs from API
       api_result = get_docs_by_region(
@@ -94,44 +94,69 @@ module OxTenderAbstract
       log_info "Found #{archive_urls.size} archives to process"
-      # Step 2: Process each archive
+      # Step 2: Process each archive with error resilience
       all_tenders = []
       total_files = 0
+      processed_archives = 0
+      failed_archives = 0
       archive_urls.each_with_index do |archive_url, index|
         log_info "Processing archive #{index + 1}/#{archive_urls.size}"
-        archive_result = download_archive_data(archive_url)
-        next if archive_result.failure?
+        begin
+          archive_result = download_archive_data(archive_url)
-        files = archive_result.data[:files]
-        total_files += files.size
+          if archive_result.failure?
+            log_error "Failed to download archive #{index + 1}: #{archive_result.error}"
+            failed_archives += 1
+            next
+          end
-        # Step 3: Parse XML files from archive
-        xml_files = files.select { |name, _| name.downcase.end_with?('.xml') }
+          processed_archives += 1
+          files = archive_result.data[:files]
+          total_files += files.size
-        xml_files.each do |file_name, file_data|
-          parse_result = parse_xml_document(file_data[:content])
-          next if parse_result.failure?
-          next unless parse_result.data[:document_type] == :tender
+          # Step 3: Parse XML files from archive
+          xml_files = files.select { |name, _| name.downcase.end_with?('.xml') }
+          log_debug "Found #{xml_files.size} XML files in archive #{index + 1}"
-          tender_data = parse_result.data[:content]
-          next if tender_data[:reestr_number].nil? || tender_data[:reestr_number].empty?
+          xml_files.each do |file_name, file_data|
+            parse_result = parse_xml_document(file_data[:content])
-          # Add metadata
-          tender_data[:source_file] = file_name
-          tender_data[:archive_url] = archive_url
-          tender_data[:processed_at] = Time.now
+            if parse_result.failure?
+              log_debug "Failed to parse #{file_name}: #{parse_result.error}"
+              next
+            end
-          all_tenders << tender_data
+            next unless parse_result.data[:document_type] == :tender
+            tender_data = parse_result.data[:content]
+            next if tender_data[:reestr_number].nil? || tender_data[:reestr_number].empty?
+            # Add metadata
+            tender_data[:source_file] = file_name
+            tender_data[:archive_url] = archive_url
+            tender_data[:processed_at] = Time.now
+            all_tenders << tender_data
+          rescue StandardError => e
+            log_error "Error processing file #{file_name}: #{e.message}"
+            # Continue with other files
+          end
+        rescue StandardError => e
+          log_error "Critical error processing archive #{index + 1}: #{e.message}"
+          failed_archives += 1
+          # Continue with other archives
         end
       end
-      log_info "Search completed. Found #{all_tenders.size} tenders in #{total_files} files"
+      log_info "Search completed. Processed: #{processed_archives}/#{archive_urls.size} archives, Failed: #{failed_archives}, Found #{all_tenders.size} tenders in #{total_files} files"
       Result.success({
                        tenders: all_tenders,
                        total_archives: archive_urls.size,
+                       processed_archives: processed_archives,
+                       failed_archives: failed_archives,
                        total_files: total_files,
                        processed_at: Time.now
                      })
@@ -211,6 +236,131 @@ module OxTenderAbstract
                      })
     end
+    # Search tenders with automatic resume capability
+    # Позволяет продолжить загрузку с места паузы при блокировках API
+    def search_tenders_with_resume(org_region:, exact_date:, subsystem_type: DocumentTypes::DEFAULT_SUBSYSTEM,
+                                   document_type: DocumentTypes::DEFAULT_DOCUMENT_TYPE,
+                                   start_from_archive: 0, resume_state: nil)
+      log_info "Starting tender search with resume capability for region #{org_region}, date #{exact_date}"
+      log_info "Starting from archive #{start_from_archive}" if start_from_archive > 0
+      # Восстанавливаем состояние если есть
+      if resume_state
+        log_info "Resuming from previous state: #{resume_state[:processed_archives]} archives processed"
+        all_tenders = resume_state[:tenders] || []
+        total_files = resume_state[:total_files] || 0
+        processed_archives = resume_state[:processed_archives] || 0
+        failed_archives = resume_state[:failed_archives] || 0
+        archive_urls = resume_state[:archive_urls]
+      else
+        # Step 1: Get archive URLs from API
+        api_result = get_docs_by_region(
+          org_region: org_region,
+          subsystem_type: subsystem_type,
+          document_type: document_type,
+          exact_date: exact_date
+        )
+        return api_result if api_result.failure?
+        archive_urls = api_result.data[:archive_urls]
+        return Result.success({ tenders: [], total_archives: 0, total_files: 0 }) if archive_urls.empty?
+        all_tenders = []
+        total_files = 0
+        processed_archives = 0
+        failed_archives = 0
+      end
+      log_info "Found #{archive_urls.size} archives to process (starting from #{start_from_archive})"
+      # Step 2: Process archives starting from specified position
+      (start_from_archive...archive_urls.size).each do |index|
+        archive_url = archive_urls[index]
+        log_info "Processing archive #{index + 1}/#{archive_urls.size}"
+        begin
+          archive_result = download_archive_data(archive_url)
+          if archive_result.failure?
+            # Проверяем, была ли блокировка с автоматическим ожиданием
+            if archive_result.metadata[:error_type] == :blocked &&
+               !OxTenderAbstract.configuration.auto_wait_on_block
+              # Возвращаем состояние для возможности продолжения
+              resume_state = {
+                tenders: all_tenders,
+                total_files: total_files,
+                processed_archives: processed_archives,
+                failed_archives: failed_archives,
+                archive_urls: archive_urls,
+                next_archive_index: index
+              }
+              return Result.failure(
+                "Archive download blocked, can resume from archive #{index + 1}",
+                error_type: :blocked,
+                retry_after: 600,
+                resume_state: resume_state
+              )
+            else
+              log_error "Failed to download archive #{index + 1}: #{archive_result.error}"
+              failed_archives += 1
+              next
+            end
+          end
+          processed_archives += 1
+          files = archive_result.data[:files]
+          total_files += files.size
+          # Step 3: Parse XML files from archive
+          xml_files = files.select { |name, _| name.downcase.end_with?('.xml') }
+          log_debug "Found #{xml_files.size} XML files in archive #{index + 1}"
+          xml_files.each do |file_name, file_data|
+            parse_result = parse_xml_document(file_data[:content])
+            if parse_result.failure?
+              log_debug "Failed to parse #{file_name}: #{parse_result.error}"
+              next
+            end
+            next unless parse_result.data[:document_type] == :tender
+            tender_data = parse_result.data[:content]
+            next if tender_data[:reestr_number].nil? || tender_data[:reestr_number].empty?
+            # Add metadata
+            tender_data[:source_file] = file_name
+            tender_data[:archive_url] = archive_url
+            tender_data[:processed_at] = Time.now
+            tender_data[:archive_index] = index
+            all_tenders << tender_data
+          rescue StandardError => e
+            log_error "Error processing file #{file_name}: #{e.message}"
+            # Continue with other files
+          end
+        rescue StandardError => e
+          log_error "Critical error processing archive #{index + 1}: #{e.message}"
+          failed_archives += 1
+          # Continue with other archives
+        end
+      end
+      log_info "Search completed. Processed: #{processed_archives}/#{archive_urls.size} archives, Failed: #{failed_archives}, Found #{all_tenders.size} tenders in #{total_files} files"
+      Result.success({
+                       tenders: all_tenders,
+                       total_archives: archive_urls.size,
+                       processed_archives: processed_archives,
+                       failed_archives: failed_archives,
+                       total_files: total_files,
+                       processed_at: Time.now,
+                       completed: true
+                     })
+    end
     private
     def validate_token!

data/lib/oxtenderabstract/configuration.rb CHANGED Viewed

@@ -5,7 +5,8 @@ require 'logger'
 module OxTenderAbstract
   # Configuration for the library
   class Configuration
-    attr_accessor :token, :timeout_open, :timeout_read, :ssl_verify
+    attr_accessor :token, :timeout_open, :timeout_read, :ssl_verify,
+                  :auto_wait_on_block, :block_wait_time, :max_wait_time
     attr_writer :wsdl_url, :logger
     def initialize
@@ -15,6 +16,9 @@ module OxTenderAbstract
       @ssl_verify = false
       @wsdl_url = nil  # Will be set later
       @logger = nil    # Will be set later
+      @auto_wait_on_block = true  # Автоматически ждать при блокировке
+      @block_wait_time = 610      # Время ожидания при блокировке (10 мин + 10 сек)
+      @max_wait_time = 900        # Максимальное время ожидания (15 мин)
     end
     def wsdl_url