llamaparserb 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +12 -2
 - data/LICENSE.txt +1 -1
 - data/README.md +31 -2
 - data/lib/llamaparserb/version.rb +1 -1
 - data/lib/llamaparserb.rb +43 -9
 - metadata +9 -8
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 14e4e5deea50bc3f5cb0e32c3c3029f6b26fda4cacf91be0f96e9f677f92d1e5
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: e3e6cb569456d1c649be22ad3e5af65e30484fce50582d7493f5e22b2ac03f61
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 72121ad4b70f95ddd2bfe23f129d5c9ee634543543ba43e4faa437da67bf3cbdf8bc1ea828aeb58d43073bed46c28d4010a6865b5bc006c32ccfc30a099d1d65
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 5bea9e367b71d38fe8f6a1a80be0b4fd190e3e56b780e369d33c3319dfd91b483283f4b43ab8fc484468ab611c5e3efc639d8302ba611ccfb71258c459fdb783
         
     | 
    
        data/CHANGELOG.md
    CHANGED
    
    | 
         @@ -6,9 +6,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 
     | 
|
| 
       6 
6 
     | 
    
         | 
| 
       7 
7 
     | 
    
         
             
            ## [Unreleased]
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
      
 9 
     | 
    
         
            +
            ## [0.2.0] - 2024-11-28
         
     | 
| 
      
 10 
     | 
    
         
            +
            ### Changed
         
     | 
| 
      
 11 
     | 
    
         
            +
            - Allow passing in a string or an IO object to `parse_file`
         
     | 
| 
      
 12 
     | 
    
         
            +
            - Add support for file type parameter to `parse_file`
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            ## [0.1.1] - 2024-11-28
         
     | 
| 
      
 15 
     | 
    
         
            +
            ### Changed
         
     | 
| 
      
 16 
     | 
    
         
            +
            - Move gem ownership to Horizing
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
       9 
18 
     | 
    
         
             
            ## [0.1.0] - 2024-11-27
         
     | 
| 
       10 
19 
     | 
    
         
             
            ### Added
         
     | 
| 
       11 
20 
     | 
    
         
             
            - Initial release
         
     | 
| 
       12 
21 
     | 
    
         | 
| 
       13 
     | 
    
         
            -
            [Unreleased]: https://github.com/ 
     | 
| 
       14 
     | 
    
         
            -
            [0.1. 
     | 
| 
      
 22 
     | 
    
         
            +
            [Unreleased]: https://github.com/horizing/llamaparserb/compare/v0.1.0...HEAD
         
     | 
| 
      
 23 
     | 
    
         
            +
            [0.1.1]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.1...v0.1.0
         
     | 
| 
      
 24 
     | 
    
         
            +
            [0.1.0]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.0
         
     | 
    
        data/LICENSE.txt
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | 
         @@ -32,14 +32,43 @@ require 'llamaparserb' 
     | 
|
| 
       32 
32 
     | 
    
         
             
            # Initialize client with API key
         
     | 
| 
       33 
33 
     | 
    
         
             
            client = Llamaparserb::Client.new(ENV['LLAMA_CLOUD_API_KEY'])
         
     | 
| 
       34 
34 
     | 
    
         | 
| 
       35 
     | 
    
         
            -
            # Parse a file to text  
     | 
| 
      
 35 
     | 
    
         
            +
            # Parse a file from disk (to text by default)
         
     | 
| 
       36 
36 
     | 
    
         
             
            text = client.parse_file('path/to/document.pdf')
         
     | 
| 
       37 
37 
     | 
    
         | 
| 
      
 38 
     | 
    
         
            +
            # Parse an in-memory file (requires file type)
         
     | 
| 
      
 39 
     | 
    
         
            +
            require 'open-uri'
         
     | 
| 
      
 40 
     | 
    
         
            +
            file_content = URI.open('https://example.com/document.pdf')
         
     | 
| 
      
 41 
     | 
    
         
            +
            text = client.parse_file(file_content, 'pdf')
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
       38 
43 
     | 
    
         
             
            # Parse a file to markdown
         
     | 
| 
       39 
44 
     | 
    
         
             
            client = Llamaparserb::Client.new(ENV['LLAMA_CLOUD_API_KEY'], result_type: "markdown")
         
     | 
| 
       40 
45 
     | 
    
         
             
            markdown = client.parse_file('path/to/document.pdf')
         
     | 
| 
       41 
46 
     | 
    
         
             
            ```
         
     | 
| 
       42 
47 
     | 
    
         | 
| 
      
 48 
     | 
    
         
            +
            ### File Input Options
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
            The `parse_file` method accepts two types of inputs:
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
            1. File path (String):
         
     | 
| 
      
 53 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 54 
     | 
    
         
            +
            client.parse_file('path/to/document.pdf')
         
     | 
| 
      
 55 
     | 
    
         
            +
            ```
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
            2. IO object (requires file type parameter):
         
     | 
| 
      
 58 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 59 
     | 
    
         
            +
            # From a URL
         
     | 
| 
      
 60 
     | 
    
         
            +
            file_content = URI.open('https://example.com/document.pdf')
         
     | 
| 
      
 61 
     | 
    
         
            +
            client.parse_file(file_content, 'pdf')
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
            # From memory
         
     | 
| 
      
 64 
     | 
    
         
            +
            io = StringIO.new(file_content)
         
     | 
| 
      
 65 
     | 
    
         
            +
            client.parse_file(io, 'pdf')
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
            # From a Tempfile
         
     | 
| 
      
 68 
     | 
    
         
            +
            temp_file = Tempfile.new(['document', '.pdf'])
         
     | 
| 
      
 69 
     | 
    
         
            +
            client.parse_file(temp_file, 'pdf')
         
     | 
| 
      
 70 
     | 
    
         
            +
            ```
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
       43 
72 
     | 
    
         
             
            ### Advanced Options
         
     | 
| 
       44 
73 
     | 
    
         | 
| 
       45 
74 
     | 
    
         
             
            ```ruby
         
     | 
| 
         @@ -107,7 +136,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To 
     | 
|
| 
       107 
136 
     | 
    
         | 
| 
       108 
137 
     | 
    
         
             
            ## Contributing
         
     | 
| 
       109 
138 
     | 
    
         | 
| 
       110 
     | 
    
         
            -
            Bug reports and pull requests are welcome on GitHub at https://github.com/ 
     | 
| 
      
 139 
     | 
    
         
            +
            Bug reports and pull requests are welcome on GitHub at https://github.com/horizing/llamaparserb.
         
     | 
| 
       111 
140 
     | 
    
         | 
| 
       112 
141 
     | 
    
         
             
            ## License
         
     | 
| 
       113 
142 
     | 
    
         | 
    
        data/lib/llamaparserb/version.rb
    CHANGED
    
    
    
        data/lib/llamaparserb.rb
    CHANGED
    
    | 
         @@ -8,6 +8,7 @@ require "mime/types" 
     | 
|
| 
       8 
8 
     | 
    
         
             
            require "uri"
         
     | 
| 
       9 
9 
     | 
    
         
             
            require "async"
         
     | 
| 
       10 
10 
     | 
    
         
             
            require "logger"
         
     | 
| 
      
 11 
     | 
    
         
            +
            require "tempfile"
         
     | 
| 
       11 
12 
     | 
    
         | 
| 
       12 
13 
     | 
    
         
             
            module Llamaparserb
         
     | 
| 
       13 
14 
     | 
    
         
             
              class Error < StandardError; end
         
     | 
| 
         @@ -41,17 +42,27 @@ module Llamaparserb 
     | 
|
| 
       41 
42 
     | 
    
         
             
                  @connection = build_connection
         
     | 
| 
       42 
43 
     | 
    
         
             
                end
         
     | 
| 
       43 
44 
     | 
    
         | 
| 
       44 
     | 
    
         
            -
                def parse_file( 
     | 
| 
       45 
     | 
    
         
            -
                   
     | 
| 
       46 
     | 
    
         
            -
                   
     | 
| 
      
 45 
     | 
    
         
            +
                def parse_file(file_input, file_type = nil)
         
     | 
| 
      
 46 
     | 
    
         
            +
                  case file_input
         
     | 
| 
      
 47 
     | 
    
         
            +
                  when String
         
     | 
| 
      
 48 
     | 
    
         
            +
                    # Treat as file path
         
     | 
| 
      
 49 
     | 
    
         
            +
                    job_id = create_job_from_path(file_input)
         
     | 
| 
      
 50 
     | 
    
         
            +
                    log "Started parsing file under job_id #{job_id}", :info
         
     | 
| 
      
 51 
     | 
    
         
            +
                  when IO, StringIO, Tempfile
         
     | 
| 
      
 52 
     | 
    
         
            +
                    # Treat as file object
         
     | 
| 
      
 53 
     | 
    
         
            +
                    raise Error, "file_type parameter is required for IO objects" unless file_type
         
     | 
| 
      
 54 
     | 
    
         
            +
                    job_id = create_job_from_io(file_input, file_type)
         
     | 
| 
      
 55 
     | 
    
         
            +
                    log "Started parsing in-memory file under job_id #{job_id}", :info
         
     | 
| 
      
 56 
     | 
    
         
            +
                  else
         
     | 
| 
      
 57 
     | 
    
         
            +
                    raise Error, "Invalid input type. Expected String (file path) or IO object, got #{file_input.class}"
         
     | 
| 
      
 58 
     | 
    
         
            +
                  end
         
     | 
| 
       47 
59 
     | 
    
         | 
| 
       48 
60 
     | 
    
         
             
                  wait_for_completion(job_id)
         
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
61 
     | 
    
         
             
                  result = get_result(job_id)
         
     | 
| 
       51 
62 
     | 
    
         
             
                  log "Successfully retrieved result", :info
         
     | 
| 
       52 
63 
     | 
    
         
             
                  result
         
     | 
| 
       53 
64 
     | 
    
         
             
                rescue => e
         
     | 
| 
       54 
     | 
    
         
            -
                  handle_error(e,  
     | 
| 
      
 65 
     | 
    
         
            +
                  handle_error(e, file_input)
         
     | 
| 
       55 
66 
     | 
    
         
             
                end
         
     | 
| 
       56 
67 
     | 
    
         | 
| 
       57 
68 
     | 
    
         
             
                private
         
     | 
| 
         @@ -144,9 +155,9 @@ module Llamaparserb 
     | 
|
| 
       144 
155 
     | 
    
         
             
                  end
         
     | 
| 
       145 
156 
     | 
    
         
             
                end
         
     | 
| 
       146 
157 
     | 
    
         | 
| 
       147 
     | 
    
         
            -
                def handle_error(error,  
     | 
| 
      
 158 
     | 
    
         
            +
                def handle_error(error, file_input)
         
     | 
| 
       148 
159 
     | 
    
         
             
                  if @options[:ignore_errors]
         
     | 
| 
       149 
     | 
    
         
            -
                    log "Error while parsing file '#{ 
     | 
| 
      
 160 
     | 
    
         
            +
                    log "Error while parsing file '#{file_input}'", :error
         
     | 
| 
       150 
161 
     | 
    
         
             
                    nil
         
     | 
| 
       151 
162 
     | 
    
         
             
                  else
         
     | 
| 
       152 
163 
     | 
    
         
             
                    raise error
         
     | 
| 
         @@ -163,14 +174,37 @@ module Llamaparserb 
     | 
|
| 
       163 
174 
     | 
    
         
             
                  end
         
     | 
| 
       164 
175 
     | 
    
         
             
                end
         
     | 
| 
       165 
176 
     | 
    
         | 
| 
       166 
     | 
    
         
            -
                def  
     | 
| 
      
 177 
     | 
    
         
            +
                def create_job_from_path(file_path)
         
     | 
| 
       167 
178 
     | 
    
         
             
                  validate_file_type!(file_path)
         
     | 
| 
       168 
     | 
    
         
            -
             
     | 
| 
       169 
179 
     | 
    
         
             
                  file = Faraday::Multipart::FilePart.new(
         
     | 
| 
       170 
180 
     | 
    
         
             
                    file_path,
         
     | 
| 
       171 
181 
     | 
    
         
             
                    detect_content_type(file_path)
         
     | 
| 
       172 
182 
     | 
    
         
             
                  )
         
     | 
| 
      
 183 
     | 
    
         
            +
                  create_job(file)
         
     | 
| 
      
 184 
     | 
    
         
            +
                end
         
     | 
| 
      
 185 
     | 
    
         
            +
             
     | 
| 
      
 186 
     | 
    
         
            +
                def create_job_from_io(io, file_type)
         
     | 
| 
      
 187 
     | 
    
         
            +
                  # Ensure file_type starts with a dot
         
     | 
| 
      
 188 
     | 
    
         
            +
                  file_type = ".#{file_type}" unless file_type.start_with?(".")
         
     | 
| 
      
 189 
     | 
    
         
            +
                  validate_file_type!(file_type)
         
     | 
| 
      
 190 
     | 
    
         
            +
             
     | 
| 
      
 191 
     | 
    
         
            +
                  temp_file = Tempfile.new(["upload", file_type])
         
     | 
| 
      
 192 
     | 
    
         
            +
                  temp_file.binmode
         
     | 
| 
      
 193 
     | 
    
         
            +
                  io.rewind
         
     | 
| 
      
 194 
     | 
    
         
            +
                  temp_file.write(io.read)
         
     | 
| 
      
 195 
     | 
    
         
            +
                  temp_file.rewind
         
     | 
| 
      
 196 
     | 
    
         
            +
             
     | 
| 
      
 197 
     | 
    
         
            +
                  file = Faraday::Multipart::FilePart.new(
         
     | 
| 
      
 198 
     | 
    
         
            +
                    temp_file,
         
     | 
| 
      
 199 
     | 
    
         
            +
                    detect_content_type(temp_file.path)
         
     | 
| 
      
 200 
     | 
    
         
            +
                  )
         
     | 
| 
      
 201 
     | 
    
         
            +
                  create_job(file)
         
     | 
| 
      
 202 
     | 
    
         
            +
                ensure
         
     | 
| 
      
 203 
     | 
    
         
            +
                  temp_file&.close
         
     | 
| 
      
 204 
     | 
    
         
            +
                  temp_file&.unlink
         
     | 
| 
      
 205 
     | 
    
         
            +
                end
         
     | 
| 
       173 
206 
     | 
    
         | 
| 
      
 207 
     | 
    
         
            +
                def create_job(file)
         
     | 
| 
       174 
208 
     | 
    
         
             
                  response = @connection.post("upload") do |req|
         
     | 
| 
       175 
209 
     | 
    
         
             
                    req.headers["Authorization"] = "Bearer #{api_key}"
         
     | 
| 
       176 
210 
     | 
    
         
             
                    req.body = upload_params(file)
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: llamaparserb
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.2.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Heidar Bernhardsson
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire:
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2024-11- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2024-11-28 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: faraday
         
     | 
| 
         @@ -66,9 +66,10 @@ dependencies: 
     | 
|
| 
       66 
66 
     | 
    
         
             
                - - "~>"
         
     | 
| 
       67 
67 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       68 
68 
     | 
    
         
             
                    version: '2.0'
         
     | 
| 
       69 
     | 
    
         
            -
            description: A llamaparse client for Ruby.
         
     | 
| 
      
 69 
     | 
    
         
            +
            description: A llamaparse client for Ruby. Loosely based on the Python version from
         
     | 
| 
      
 70 
     | 
    
         
            +
              llamaparse.
         
     | 
| 
       70 
71 
     | 
    
         
             
            email:
         
     | 
| 
       71 
     | 
    
         
            -
            - heidar@ 
     | 
| 
      
 72 
     | 
    
         
            +
            - heidar@horizing.io
         
     | 
| 
       72 
73 
     | 
    
         
             
            executables: []
         
     | 
| 
       73 
74 
     | 
    
         
             
            extensions: []
         
     | 
| 
       74 
75 
     | 
    
         
             
            extra_rdoc_files: []
         
     | 
| 
         @@ -78,14 +79,14 @@ files: 
     | 
|
| 
       78 
79 
     | 
    
         
             
            - README.md
         
     | 
| 
       79 
80 
     | 
    
         
             
            - lib/llamaparserb.rb
         
     | 
| 
       80 
81 
     | 
    
         
             
            - lib/llamaparserb/version.rb
         
     | 
| 
       81 
     | 
    
         
            -
            homepage: https://github.com/ 
     | 
| 
      
 82 
     | 
    
         
            +
            homepage: https://github.com/horizing/llamaparserb
         
     | 
| 
       82 
83 
     | 
    
         
             
            licenses:
         
     | 
| 
       83 
84 
     | 
    
         
             
            - MIT
         
     | 
| 
       84 
85 
     | 
    
         
             
            metadata:
         
     | 
| 
       85 
86 
     | 
    
         
             
              allowed_push_host: https://rubygems.org
         
     | 
| 
       86 
     | 
    
         
            -
              homepage_uri: https://github.com/ 
     | 
| 
       87 
     | 
    
         
            -
              source_code_uri: https://github.com/ 
     | 
| 
       88 
     | 
    
         
            -
              changelog_uri: https://github.com/ 
     | 
| 
      
 87 
     | 
    
         
            +
              homepage_uri: https://github.com/horizing/llamaparserb
         
     | 
| 
      
 88 
     | 
    
         
            +
              source_code_uri: https://github.com/horizing/llamaparserb
         
     | 
| 
      
 89 
     | 
    
         
            +
              changelog_uri: https://github.com/horizing/llamaparserb/blob/master/CHANGELOG.md
         
     | 
| 
       89 
90 
     | 
    
         
             
            post_install_message:
         
     | 
| 
       90 
91 
     | 
    
         
             
            rdoc_options: []
         
     | 
| 
       91 
92 
     | 
    
         
             
            require_paths:
         
     |