llamaparserb 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -5
- data/README.md +9 -1
- data/lib/llamaparserb/version.rb +1 -1
- data/lib/llamaparserb.rb +33 -4
- metadata +1 -1
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 9c49dc1624b1b955cad8032696308fc81357533113cf429f53fe063d5db0cab2
         | 
| 4 | 
            +
              data.tar.gz: 2eee40a9054fe05d02a094828bb47a6cc3a5b65ca6fcb5864b9648cff5f0b418
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 50ed45e0d813d776b79fce2e87593cdc88063158045c62ba76e1bc0e353fb1b27b2b03b87dc4253443b4b2a355fec179708d04e1920dc0f47730c271e3ff81cc
         | 
| 7 | 
            +
              data.tar.gz: 9f86c50b3f5c4bd987c9bab987c5b139ce1dd998577d04ea3fba53d64b430d3cdcc0af3d880d73ebea1914341bc185fb78469f78d9521098f06510c7024b8746
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    | @@ -6,14 +6,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | |
| 6 6 |  | 
| 7 7 | 
             
            ## [Unreleased]
         | 
| 8 8 |  | 
| 9 | 
            +
            ## [0.3.0] - 2024-11-28
         | 
| 10 | 
            +
            ### Added
         | 
| 11 | 
            +
            - Add support for parsing files from URLs
         | 
| 12 | 
            +
             | 
| 9 13 | 
             
            ## [0.2.3] - 2024-11-28
         | 
| 10 14 | 
             
            ### Added
         | 
| 11 15 | 
             
            - Add support for all supported optional llamaparsse parameters to `parse_file`
         | 
| 12 16 |  | 
| 17 | 
            +
            [0.2.3]: https://github.com/horizing/llamaparserb/releases/tag/v0.2.3...v0.2.2
         | 
| 18 | 
            +
             | 
| 13 19 | 
             
            ## [0.2.2] - 2024-11-28
         | 
| 14 20 | 
             
            ### Fixed
         | 
| 15 21 | 
             
            - Fix issue with handling file path
         | 
| 16 22 |  | 
| 23 | 
            +
            [0.2.2]: https://github.com/horizing/llamaparserb/releases/tag/v0.2.2
         | 
| 24 | 
            +
             | 
| 17 25 | 
             
            ## [0.2.1] - 2024-11-28
         | 
| 18 26 | 
             
            ### Fixed
         | 
| 19 27 | 
             
            - Fix parse_file to handle files that are not on the local filesystem
         | 
| @@ -29,8 +37,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | |
| 29 37 |  | 
| 30 38 | 
             
            ## [0.1.0] - 2024-11-27
         | 
| 31 39 | 
             
            ### Added
         | 
| 32 | 
            -
            - Initial release
         | 
| 33 | 
            -
             | 
| 34 | 
            -
            [Unreleased]: https://github.com/horizing/llamaparserb/compare/v0.1.0...HEAD
         | 
| 35 | 
            -
            [0.1.1]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.1...v0.1.0
         | 
| 36 | 
            -
            [0.1.0]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.0
         | 
| 40 | 
            +
            - Initial release
         | 
    
        data/README.md
    CHANGED
    
    | @@ -43,11 +43,14 @@ text = client.parse_file(file_content, 'pdf') | |
| 43 43 | 
             
            # Parse a file to markdown
         | 
| 44 44 | 
             
            client = Llamaparserb::Client.new(ENV['LLAMA_CLOUD_API_KEY'], result_type: "markdown")
         | 
| 45 45 | 
             
            markdown = client.parse_file('path/to/document.pdf')
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            # Parse a file from a URL
         | 
| 48 | 
            +
            markdown = client.parse_file('https://example.com/document.pdf')
         | 
| 46 49 | 
             
            ```
         | 
| 47 50 |  | 
| 48 51 | 
             
            ### File Input Options
         | 
| 49 52 |  | 
| 50 | 
            -
            The `parse_file` method accepts  | 
| 53 | 
            +
            The `parse_file` method accepts three types of inputs:
         | 
| 51 54 |  | 
| 52 55 | 
             
            1. File path (String):
         | 
| 53 56 | 
             
            ```ruby
         | 
| @@ -69,6 +72,11 @@ temp_file = Tempfile.new(['document', '.pdf']) | |
| 69 72 | 
             
            client.parse_file(temp_file, 'pdf')
         | 
| 70 73 | 
             
            ```
         | 
| 71 74 |  | 
| 75 | 
            +
            3. URL (String):
         | 
| 76 | 
            +
            ```ruby
         | 
| 77 | 
            +
            client.parse_file('https://example.com/document.pdf')
         | 
| 78 | 
            +
            ```
         | 
| 79 | 
            +
             | 
| 72 80 | 
             
            ### Advanced Options
         | 
| 73 81 |  | 
| 74 82 | 
             
            ```ruby
         | 
    
        data/lib/llamaparserb/version.rb
    CHANGED
    
    
    
        data/lib/llamaparserb.rb
    CHANGED
    
    | @@ -51,6 +51,9 @@ module Llamaparserb | |
| 51 51 | 
             
                    elsif File.exist?(file_input)
         | 
| 52 52 | 
             
                      job_id = create_job_from_path(file_input)
         | 
| 53 53 | 
             
                      log "Started parsing file under job_id #{job_id}", :info
         | 
| 54 | 
            +
                    elsif URI::DEFAULT_PARSER.make_regexp.match?(file_input)
         | 
| 55 | 
            +
                      job_id = create_job_from_url(file_input)
         | 
| 56 | 
            +
                      log "Started parsing URL under job_id #{job_id}", :info
         | 
| 54 57 | 
             
                    else
         | 
| 55 58 | 
             
                      raise Error, "file_type parameter is required for binary string input"
         | 
| 56 59 | 
             
                    end
         | 
| @@ -197,7 +200,7 @@ module Llamaparserb | |
| 197 200 | 
             
                def build_connection
         | 
| 198 201 | 
             
                  Faraday.new(url: base_url) do |f|
         | 
| 199 202 | 
             
                    f.request :multipart
         | 
| 200 | 
            -
                    f.request : | 
| 203 | 
            +
                    f.request :url_encoded
         | 
| 201 204 | 
             
                    f.response :json
         | 
| 202 205 | 
             
                    f.response :raise_error
         | 
| 203 206 | 
             
                    f.adapter Faraday.default_adapter
         | 
| @@ -234,7 +237,13 @@ module Llamaparserb | |
| 234 237 | 
             
                    temp_file,
         | 
| 235 238 | 
             
                    detect_content_type(temp_file.path)
         | 
| 236 239 | 
             
                  )
         | 
| 237 | 
            -
             | 
| 240 | 
            +
             | 
| 241 | 
            +
                  response = @connection.post("upload") do |req|
         | 
| 242 | 
            +
                    req.headers["Authorization"] = "Bearer #{api_key}"
         | 
| 243 | 
            +
                    req.body = {file: file}
         | 
| 244 | 
            +
                  end
         | 
| 245 | 
            +
             | 
| 246 | 
            +
                  response.body["id"]
         | 
| 238 247 | 
             
                ensure
         | 
| 239 248 | 
             
                  temp_file&.close
         | 
| 240 249 | 
             
                  temp_file&.unlink
         | 
| @@ -249,9 +258,8 @@ module Llamaparserb | |
| 249 258 | 
             
                  response.body["id"]
         | 
| 250 259 | 
             
                end
         | 
| 251 260 |  | 
| 252 | 
            -
                def upload_params(file)
         | 
| 261 | 
            +
                def upload_params(file = nil, url = nil)
         | 
| 253 262 | 
             
                  params = {
         | 
| 254 | 
            -
                    file: file,
         | 
| 255 263 | 
             
                    language: @options[:language].to_s,
         | 
| 256 264 | 
             
                    parsing_instruction: @options[:parsing_instruction],
         | 
| 257 265 | 
             
                    invalidate_cache: @options[:invalidate_cache],
         | 
| @@ -286,6 +294,12 @@ module Llamaparserb | |
| 286 294 | 
             
                  params[:azure_openai_key] = @options[:azure_openai_key] if @options[:azure_openai_key]
         | 
| 287 295 | 
             
                  params[:http_proxy] = @options[:http_proxy] if @options[:http_proxy]
         | 
| 288 296 |  | 
| 297 | 
            +
                  if url
         | 
| 298 | 
            +
                    params[:input_url] = url.to_s
         | 
| 299 | 
            +
                  elsif file
         | 
| 300 | 
            +
                    params[:file] = file
         | 
| 301 | 
            +
                  end
         | 
| 302 | 
            +
             | 
| 289 303 | 
             
                  params.compact
         | 
| 290 304 | 
             
                end
         | 
| 291 305 |  | 
| @@ -335,5 +349,20 @@ module Llamaparserb | |
| 335 349 | 
             
                    raise Error, "Unsupported file type: #{extension}. Supported types: #{SUPPORTED_FILE_TYPES.join(", ")}"
         | 
| 336 350 | 
             
                  end
         | 
| 337 351 | 
             
                end
         | 
| 352 | 
            +
             | 
| 353 | 
            +
                def create_job_from_url(url)
         | 
| 354 | 
            +
                  log "Creating job from URL: #{url}", :debug
         | 
| 355 | 
            +
             | 
| 356 | 
            +
                  response = @connection.post("upload") do |req|
         | 
| 357 | 
            +
                    req.headers["Authorization"] = "Bearer #{api_key}"
         | 
| 358 | 
            +
                    req.headers["Accept"] = "application/json"
         | 
| 359 | 
            +
                    # Create a simple form data request
         | 
| 360 | 
            +
                    req.options.timeout = 30  # Optional: add timeout
         | 
| 361 | 
            +
                    req.body = {"input_url" => url.to_s}
         | 
| 362 | 
            +
                  end
         | 
| 363 | 
            +
             | 
| 364 | 
            +
                  log "Response: #{response.body.inspect}", :debug
         | 
| 365 | 
            +
                  response.body["id"]
         | 
| 366 | 
            +
                end
         | 
| 338 367 | 
             
              end
         | 
| 339 368 | 
             
            end
         |