datalab 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9ae1f504f63608091a53657a07e315c7578fb6a1d31f260a083db626feabdf20
4
+ data.tar.gz: d41b72dcba3ce505311318220acf789b6329f56069ba27783c18c6936b76cbe1
5
+ SHA512:
6
+ metadata.gz: '08a22067023a2ba935cad056132a9a190a5c5f16068ae77d51f59f68dd42f428eb5306d974599a6d0553a9a7a71ce733aa88e210af2a48af4699a68a223cdc4a'
7
+ data.tar.gz: f49525b822f5739ef7613d56ab0e87322e903f3debacd22ca43cbb73c8bd7989cab3bf5c80145e613ce74428ab2d9afd6b6fcc9da5665d6a269fc964d8df7077
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Endless International
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/datalab.gemspec ADDED
@@ -0,0 +1,35 @@
1
+ Gem::Specification.new do | spec |
2
+
3
+ spec.name = 'datalab'
4
+ spec.version = '0.1.0'
5
+ spec.authors = [ 'Kristoph Cichocki-Romanov' ]
6
+ spec.email = [ 'rubygems.org@kristoph.net' ]
7
+
8
+ spec.summary =
9
+ "The Datalab gem implements a lightweight interface to the Datalab API which provides " \
10
+ "document to Markdown conversion as well as sophisticated OCR for documents and images."
11
+ spec.description =
12
+ "The Datalab gem implements a lightweight interface to the Datalab API. The Datalab API " \
13
+ "can convert a number of document formats, including PDF, Word and Powerpoint to Markdown. " \
14
+ "In addition in offers sophisticate OCR, layout and line detection for documents an images."
15
+ spec.license = 'MIT'
16
+ spec.homepage = 'https://github.com/EndlessInternational/datalab'
17
+ spec.metadata = {
18
+ 'source_code_uri' => 'https://github.com/EndlessInternational/datalab',
19
+ 'bug_tracker_uri' => 'https://github.com/EndlessInternational/datalab/issues',
20
+ # 'documentation_uri' => 'https://github.com/EndlessInternational/datalab'
21
+ }
22
+
23
+ spec.required_ruby_version = '>= 3.0'
24
+ spec.files = Dir[ "lib/**/*.rb", "LICENSE", "README.md", "datalab.gemspec" ]
25
+ spec.require_paths = [ "lib" ]
26
+
27
+ spec.add_runtime_dependency 'faraday', '~> 2.7'
28
+ spec.add_runtime_dependency 'faraday-multipart', '~>1.0'
29
+ spec.add_runtime_dependency 'dynamicschema', '~> 1.0.0.beta04'
30
+
31
+ spec.add_development_dependency 'rspec', '~> 3.13'
32
+ spec.add_development_dependency 'debug', '~> 1.9'
33
+ spec.add_development_dependency 'vcr', '~> 6.3'
34
+
35
+ end
@@ -0,0 +1,55 @@
1
+ module Datalab
2
+ class ErrorResult
3
+
4
+ attr_reader :error_type, :error_description
5
+
6
+ def initialize( status_code, attributes = nil )
7
+ @error_code, @error_description = status_code_to_error( status_code )
8
+ if detail = attributes[ :detail ]&.first rescue nil
9
+ if message = detail[ :msg ]
10
+ @error_description = message
11
+ if location = detail[ :loc ]
12
+ @error_description += " : " + [ location ].flatten.join( '/' )
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+ private
19
+ def status_code_to_error( status_code )
20
+ case status_code
21
+ # this is here because I've noted invalid payloads being returned with a 200
22
+ when 200
23
+ [ :unexpected_error,
24
+ "The response was successful but it did not include a valid payload." ]
25
+ when 400
26
+ [ :invalid_request_error,
27
+ "There was an issue with the format or content of your request." ]
28
+ when 401
29
+ [ :authentication_error,
30
+ "There's an issue with your API key." ]
31
+ when 402
32
+ [ :payment_required,
33
+ "The request requires a paid account" ]
34
+ when 404
35
+ [ :not_found_error,
36
+ "The requested resource was not found." ]
37
+ when 422
38
+ [ :invalid_data_error,
39
+ "The request body is invalid." ]
40
+ when 429
41
+ [ :rate_limit_error,
42
+ "Your account has hit a rate limit." ]
43
+ when 500..505
44
+ [ :api_error,
45
+ "An unexpected Datalab server error has occurred." ]
46
+ when 529
47
+ [ :overloaded_error,
48
+ "The Datalab service is overloaded." ]
49
+ else
50
+ [ :unknown_error,
51
+ "The Datalab service returned an unexpected status code: '#{status_code}'." ]
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,34 @@
1
+ module Datalab
2
+ class MarkerOptions
3
+ include DynamicSchema::Definable
4
+
5
+ schema do
6
+ maximum_pages Integer, as: :max_pages, in: (1..)
7
+ languages String, array: true
8
+ force_ocr [ TrueClass, FalseClass ]
9
+ paginate [ TrueClass, FalseClass ]
10
+ extract_images [ TrueClass, FalseClass ]
11
+ end
12
+
13
+ def self.build( options = nil, &block )
14
+ new( api_options: builder.build( options, &block ) )
15
+ end
16
+
17
+ def self.build!( options = nil, &block )
18
+ new( api_options: builder.build!( options, &block ) )
19
+ end
20
+
21
+ def initialize( options = {}, api_options: nil )
22
+ @options = self.class.builder.build( options || {} )
23
+ @options = api_options.merge( @options ) if api_options
24
+ @options[ :formats ]&.map! { | format | string_camelize( format.to_s ) }
25
+ end
26
+
27
+ def to_h
28
+ @options.to_h
29
+ end
30
+
31
+ end
32
+ end
33
+
34
+
@@ -0,0 +1,99 @@
1
+ module Datalab
2
+
3
+ ##
4
+ # The +MarkerRequest+ class encapsulates a document conversion request in the Datalab API.
5
+ # After instantiating a new +MarkerRequest+ instance you can begin a markdown conversion
6
+ # document by calling the +submit+ method and then subsequently retrieving the results by
7
+ # calling the +retrieve+' method.
8
+ #
9
+ # === examples
10
+ #
11
+ # require 'datalab'
12
+ #
13
+ # request = Datalab::MarkerRequest.new( api_key: ENV[ 'DATALAB_API_KEY' ] )
14
+ #
15
+ # file = Faraday::UploadIO.new( ARGV[ 0 ], 'application/pdf' )
16
+ # response = request.submit( file )
17
+ # while response.success? && ( result = response.result ).success?
18
+ # result = request.retrieve( result )
19
+ # break if result.complete?
20
+ # end
21
+ #
22
+ # if response.success?
23
+ # if response.result.success?
24
+ # puts response.result.markdown
25
+ # else
26
+ # puts response.result.failure_message
27
+ # end
28
+ # else
29
+ # puts response.result.error_description
30
+ # end
31
+ #
32
+ class MarkerRequest < Request
33
+
34
+ ##
35
+ # The +submit+ method makes a Datalab '/marker' POST request which will begin conversion of
36
+ # the given file to markdown.
37
+ #
38
+ # The response is always an instance of +Faraday::Response+. If +response.success?+ is true,
39
+ # then +response.result+ will be an instance +MarkerResult+. If the request is not successful
40
+ # then +response.result+ will be an instance of +ErrorResult+.
41
+ #
42
+ # Remember that you should call +response.success?+ to validate that the call to the API was
43
+ # successful and then +response.result.success?+ to validate that the API processed the
44
+ # request successfuly.
45
+ #
46
+ def submit( file, options = nil, &block )
47
+ if options
48
+ options = options.is_a?( MarkerOptions ) ? options : MarkerOptions.build( options.to_h )
49
+ options = options.to_h
50
+ else
51
+ options = {}
52
+ end
53
+ options[ :file ] = file
54
+ response = post( "#{BASE_URI}/marker", options, &block )
55
+ result = nil
56
+ if response.success?
57
+ attributes = ( JSON.parse( response.body, symbolize_names: true ) rescue nil )
58
+ attributes ||= { success: false, status: :failed, error: 'An unknown error occured.' }
59
+ result = MarkerResult.new( attributes )
60
+ else
61
+ attributes = JSON.parse( response.body, symbolize_names: true ) rescue {}
62
+ result = ErrorResult.new( response.status, attributes )
63
+ end
64
+
65
+ ResponseMethods.install( response, result )
66
+ end
67
+
68
+ ##
69
+ # The +retrieve+ method takes the successful result of the submit method and makes a Datalab
70
+ # '/marker/{id}' GET request which will return the conversion progress result or, if
71
+ # conversion has been completed, the conversion results.
72
+ #
73
+ # The response is always an instance of +Faraday::Response+. If +response.success?+ is +true+,
74
+ # then +response.result+ will be an instance +Datalab::MarkerResult+. If the request is not
75
+ # successful then +response.result+ will be an instance of +Datalab::ErrorResult+.
76
+ #
77
+ # Remember that you should call +response.success?+ to valida that the call to the API was
78
+ # successful and then +response.result.success?+ to validate that the API processed the
79
+ # request successfuly.
80
+ #
81
+ def retrieve( submit_result, &block )
82
+ raise ArgumentError, "The first argument must be an instance of MarkerResult." \
83
+ unless submit_result.is_a?( MarkerResult )
84
+
85
+ response = get( "#{BASE_URI}/marker/#{submit_result.id}", &block )
86
+ result = nil
87
+ attributes = JSON.parse( response.body, symbolize_names: true ) rescue nil
88
+
89
+ if response.success?
90
+ result = submit_result.merge( attributes || { success: false, status: :failed } )
91
+ else
92
+ result = ErrorResult.new( response.status, attributes || {} )
93
+ end
94
+
95
+ ResponseMethods.install( response, result )
96
+ end
97
+
98
+ end
99
+ end
@@ -0,0 +1,75 @@
1
+ module Datalab
2
+ class MarkerResult
3
+
4
+ def initialize( attributes )
5
+ @success =
6
+ attributes[ :success ] ||
7
+ [ :processing, :complete ].include?( attributes[ :status ]&.to_sym )
8
+ @attributes = attributes || {}
9
+ end
10
+
11
+ def id
12
+ @attributes[ :request_id ]
13
+ end
14
+
15
+ ##
16
+ # The +success?+ method returns +true+ if the converstion was successful.
17
+ #
18
+ # Note that the response +success?+ tells you if the call to the Datalab API was successful
19
+ # while this +success?+ method tells you if the actual conversaion operation began
20
+ # successfully.
21
+ #
22
+ def success?
23
+ @success || false
24
+ end
25
+
26
+ def status
27
+ @attributes[ :status ]&.to_sym || :processing
28
+ end
29
+
30
+ def processing?
31
+ status == :processing
32
+ end
33
+
34
+ def complete?
35
+ status == :complete
36
+ end
37
+
38
+ ##
39
+ # If +success?+ returns +false+ this method will return a message explaining the reason
40
+ # for the failure.
41
+ #
42
+ def failure_message
43
+ @attributes[ :error ]
44
+ end
45
+
46
+ ##
47
+ # The +markdown+ method returns the markdown content extracted from the given document.
48
+ #
49
+ def markdown
50
+ @attributes[ :markdown ]
51
+ end
52
+
53
+ ##
54
+ #
55
+ #
56
+ def images
57
+ @attribute[ :images ]
58
+ end
59
+
60
+ def metadata
61
+ unless @metadata
62
+ metadata = @attributes[ :metadata ] || {}
63
+ @metadata = metadata.transform_keys do | key |
64
+ key.to_s.gsub( /([a-z])([A-Z])/, '\1_\2' ).downcase
65
+ end
66
+ end
67
+ @metadata
68
+ end
69
+
70
+ def merge( attributes )
71
+ self.class.new( ( @attributes || {} ).merge( attributes ) )
72
+ end
73
+
74
+ end
75
+ end
@@ -0,0 +1,18 @@
1
+ module Datalab
2
+ module ModuleMethods
3
+ DEFAULT_CONNECTION = Faraday.new do | builder |
4
+ builder.request :multipart
5
+ builder.request :url_encoded
6
+ builder.adapter Faraday.default_adapter
7
+ end
8
+
9
+ def connection( connection = nil )
10
+ @connection = connection || @connection || DEFAULT_CONNECTION
11
+ end
12
+
13
+ def api_key( api_key = nil )
14
+ @api_key = api_key || @api_key
15
+ @api_key
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,30 @@
1
+ module Datalab
2
+ class OcrOptions
3
+ include DynamicSchema::Definable
4
+
5
+ schema do
6
+ maximum_pages Integer, as: :max_pages, in: (1..)
7
+ languages String, array: true
8
+ end
9
+
10
+ def self.build( options = nil, &block )
11
+ new( api_options: builder.build( options, &block ) )
12
+ end
13
+
14
+ def self.build!( options = nil, &block )
15
+ new( api_options: builder.build!( options, &block ) )
16
+ end
17
+
18
+ def initialize( options = {}, api_options: nil )
19
+ @options = self.class.builder.build( options || {} )
20
+ @options = api_options.merge( @options ) if api_options
21
+ end
22
+
23
+ def to_h
24
+ @options.to_h
25
+ end
26
+
27
+ end
28
+ end
29
+
30
+
@@ -0,0 +1,23 @@
1
+ module Datalab
2
+ class OcrPage
3
+ def initialize( attributes )
4
+ @attributes = attributes&.dup || {}
5
+ end
6
+
7
+ def text_lines
8
+ ( @attributes[ :text_lines ] || [] ).map( &OcrTextLine.method( :new ) )
9
+ end
10
+
11
+ def languages
12
+ @attributes[ :languages ]
13
+ end
14
+
15
+ def number
16
+ @attributes[ :page ]
17
+ end
18
+
19
+ def bounding_rectangle
20
+ @attributes[ :image_bbox ]
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,98 @@
1
+ module Datalab
2
+
3
+ ##
4
+ # The +OcrRequest+ class encapsulates a document or image recognition request in the Datalab API.
5
+ # After instantiating a new +OcrRequest+ instance you can begin recognition by calling the
6
+ # +submit+ method and then subsequently retrieving the results by calling the +retrieve+ method.
7
+ #
8
+ # === examples
9
+ #
10
+ # require 'datalab'
11
+ #
12
+ # request = Datalab::OcrRequest.new( api_key: ENV[ 'DATALAB_API_KEY' ] )
13
+ #
14
+ # file = Faraday::UploadIO.new( ARGV[ 0 ], 'image/jpeg' )
15
+ # response = request.submit( file )
16
+ # while response.success? && ( result = response.result ).success?
17
+ # result = request.retrieve( result )
18
+ # break if result.complete?
19
+ # end
20
+ #
21
+ # if response.success?
22
+ # if response.result.success?
23
+ # puts response.result.markdown
24
+ # else
25
+ # puts response.result.failure_message
26
+ # end
27
+ # else
28
+ # puts response.result.error_description
29
+ # end
30
+ #
31
+ class OcrRequest < Request
32
+
33
+ ##
34
+ # The +submit+ method makes a Datalab '/ocr' POST request which will begin recognition of the
35
+ # given file.
36
+ #
37
+ # The response is always an instance of +Faraday::Response+. If +response.success?+ is true,
38
+ # then +response.result+ will be an instance +OcrResult+. If the request is not successful
39
+ # then +response.result+ will be an instance of +ErrorResult+.
40
+ #
41
+ # Remember that you should call +response.success?+ to validate that the call to the API was
42
+ # successful and then +response.result.success?+ to validate that the API processed the
43
+ # request successfuly.
44
+ #
45
+ def submit( file, options = nil, &block )
46
+ if options
47
+ options = options.is_a?( OcrOptions ) ? options : OcrOptions.build( options.to_h )
48
+ options = options.to_h
49
+ else
50
+ options = {}
51
+ end
52
+ options[ :file ] = file
53
+ response = post( "#{BASE_URI}/ocr", options, &block )
54
+ result = nil
55
+ if response.success?
56
+ attributes = ( JSON.parse( response.body, symbolize_names: true ) rescue nil )
57
+ attributes ||= { success: false, status: :failed, error: 'An unknown error occured.' }
58
+ result = OcrResult.new( attributes )
59
+ else
60
+ attributes = JSON.parse( response.body, symbolize_names: true ) rescue {}
61
+ result = ErrorResult.new( response.status, attributes )
62
+ end
63
+
64
+ ResponseMethods.install( response, result )
65
+ end
66
+
67
+ ##
68
+ # The +retrieve+ method takes the successful result of the submit method and makes a Datalab
69
+ # '/ocr/{id}' GET request which will return the recognition progress result or, if recognition
70
+ # has been completed, the recognition results.
71
+ #
72
+ # The response is always an instance of +Faraday::Response+. If +response.success?+ is +true+,
73
+ # then +response.result+ will be an instance +Datalab::OcrResult+. If the request is not
74
+ # successful then +response.result+ will be an instance of +Datalab::ErrorResult+.
75
+ #
76
+ # Remember that you should call +response.success?+ to valide that the call to the API was
77
+ # successful and then +response.result.success?+ to validate that the API processed the
78
+ # request successfuly.
79
+ #
80
+ def retrieve( submit_result, &block )
81
+ raise ArgumentError, "The first argument must be an instance of OcrResult." \
82
+ unless submit_result.is_a?( OcrResult )
83
+
84
+ response = get( "#{BASE_URI}/ocr/#{submit_result.id}", &block )
85
+ result = nil
86
+ attributes = JSON.parse( response.body, symbolize_names: true ) rescue nil
87
+
88
+ if response.success?
89
+ result = submit_result.merge( attributes || { success: false, status: :failed } )
90
+ else
91
+ result = ErrorResult.new( response.status, attributes || {} )
92
+ end
93
+
94
+ ResponseMethods.install( response, result )
95
+ end
96
+
97
+ end
98
+ end
@@ -0,0 +1,64 @@
1
+ module Datalab
2
+ class OcrResult
3
+
4
+ def initialize( attributes )
5
+ @success =
6
+ attributes[ :success ] ||
7
+ [ :processing, :complete ].include?( attributes[ :status ]&.to_sym )
8
+ @attributes = attributes || {}
9
+ end
10
+
11
+ def id
12
+ @attributes[ :request_id ]
13
+ end
14
+
15
+ ##
16
+ # The +success?+ method returns +true+ if the converstion was successful.
17
+ #
18
+ # Note that the response +success?+ tells you if the call to the Datalab API was successful
19
+ # while this +success?+ method tells you if the actual conversaion operation began
20
+ # successfully.
21
+ #
22
+ def success?
23
+ @success || false
24
+ end
25
+
26
+ def status
27
+ @attributes[ :status ]&.to_sym || :processing
28
+ end
29
+
30
+ def processing?
31
+ status == :processing
32
+ end
33
+
34
+ def complete?
35
+ status == :complete
36
+ end
37
+
38
+ ##
39
+ # If +success?+ returns +false+ this method will return a message explaining the reason
40
+ # for the failure.
41
+ #
42
+ def failure_message
43
+ @attributes[ :error ]
44
+ end
45
+
46
+ ##
47
+ # The +pages+ method returns the pages extracted from the given document. If the given
48
+ # document was an image the result is an array with a single page. If no pages were recognized
49
+ # the result is an empty array.
50
+ #
51
+ def pages
52
+ ( @attributes[ :pages ] || [] ).map( &OcrPage.method( :new ) )
53
+ end
54
+
55
+ def page_count
56
+ @attributes[ :page_count ]
57
+ end
58
+
59
+ def merge( attributes )
60
+ self.class.new( ( @attributes || {} ).merge( attributes ) )
61
+ end
62
+
63
+ end
64
+ end
@@ -0,0 +1,23 @@
1
+ module Datalab
2
+ class OcrTextLine
3
+ def initialize( attributes )
4
+ @attributes = attributes&.dup || {}
5
+ end
6
+
7
+ def text
8
+ @attributes[ :text ]
9
+ end
10
+
11
+ def confidence
12
+ @attributes[ :confidence ]
13
+ end
14
+
15
+ def bounding_polygon
16
+ @attributes[ :polygon ]
17
+ end
18
+
19
+ def bounding_rectangle
20
+ @attributes[ :image_bbox ]
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,38 @@
1
+ module Datalab
2
+
3
+ ##
4
+ # The +Request+ class encapsulates a request to the Datalab API. This class serves as the
5
+ # implementation of the MarkerRequest, OcrRequest and other classes and should not be used
6
+ # directly.
7
+ #
8
+ class Request
9
+
10
+ BASE_URI = 'https://www.datalab.to/api/v1'
11
+
12
+ ##
13
+ # The +initialize+ method initializes the +Request+ instance. You can pass an +api_key+ and
14
+ # and optionally a (Faraday) +connection+.
15
+ #
16
+ def initialize( connection: nil, api_key: nil )
17
+ @connection = connection || Datalab.connection
18
+ @api_key = api_key || Datalab.api_key
19
+ raise ArgumentError, "An 'api_key' is required unless configured using 'Datalab.api_key'." \
20
+ unless @api_key
21
+ end
22
+
23
+ protected
24
+
25
+ def post( uri, body, &block )
26
+ @connection.post( uri, body, 'X-Api-Key' => @api_key, &block )
27
+ end
28
+
29
+ def get( uri, &block )
30
+ @connection.get( uri ) do | request |
31
+ request.headers[ 'X-Api-Key' ] = @api_key
32
+ block.call( request ) if block
33
+ end
34
+ end
35
+
36
+ end
37
+
38
+ end
@@ -0,0 +1,15 @@
1
+ module Datalab
2
+ #
3
+ # The ResponseMethods module extends a Faraday reponse, adding the +result+ method.
4
+ #
5
+ module ResponseMethods
6
+ def self.install( response, result )
7
+ response.instance_variable_set( "@_datalab_result", result )
8
+ response.extend( ResponseMethods )
9
+ end
10
+
11
+ def result
12
+ @_datalab_result
13
+ end
14
+ end
15
+ end
data/lib/datalab.rb ADDED
@@ -0,0 +1,29 @@
1
+ require 'json'
2
+ require 'base64'
3
+ require 'uri'
4
+
5
+ require 'faraday'
6
+ require 'faraday/multipart'
7
+ require 'dynamic_schema'
8
+
9
+ require_relative 'datalab/error_result'
10
+ require_relative 'datalab/request'
11
+ require_relative 'datalab/response_methods'
12
+
13
+ require_relative 'datalab/marker_options'
14
+ require_relative 'datalab/marker_result'
15
+ require_relative 'datalab/marker_request'
16
+
17
+ require_relative 'datalab/ocr_text_line'
18
+ require_relative 'datalab/ocr_page'
19
+ require_relative 'datalab/ocr_options'
20
+ require_relative 'datalab/ocr_result'
21
+ require_relative 'datalab/ocr_request'
22
+
23
+ require_relative 'datalab/module_methods'
24
+
25
+ module Datalab
26
+ extend ModuleMethods
27
+ end
28
+
29
+
metadata ADDED
@@ -0,0 +1,149 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: datalab
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kristoph Cichocki-Romanov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-11-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: faraday
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: faraday-multipart
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: dynamicschema
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 1.0.0.beta04
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 1.0.0.beta04
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.13'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.13'
69
+ - !ruby/object:Gem::Dependency
70
+ name: debug
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.9'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.9'
83
+ - !ruby/object:Gem::Dependency
84
+ name: vcr
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '6.3'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '6.3'
97
+ description: The Datalab gem implements a lightweight interface to the Datalab API.
98
+ The Datalab API can convert a number of document formats, including PDF, Word and
99
+ Powerpoint to Markdown. In addition in offers sophisticate OCR, layout and line
100
+ detection for documents an images.
101
+ email:
102
+ - rubygems.org@kristoph.net
103
+ executables: []
104
+ extensions: []
105
+ extra_rdoc_files: []
106
+ files:
107
+ - LICENSE
108
+ - datalab.gemspec
109
+ - lib/datalab.rb
110
+ - lib/datalab/error_result.rb
111
+ - lib/datalab/marker_options.rb
112
+ - lib/datalab/marker_request.rb
113
+ - lib/datalab/marker_result.rb
114
+ - lib/datalab/module_methods.rb
115
+ - lib/datalab/ocr_options.rb
116
+ - lib/datalab/ocr_page.rb
117
+ - lib/datalab/ocr_request.rb
118
+ - lib/datalab/ocr_result.rb
119
+ - lib/datalab/ocr_text_line.rb
120
+ - lib/datalab/request.rb
121
+ - lib/datalab/response_methods.rb
122
+ homepage: https://github.com/EndlessInternational/datalab
123
+ licenses:
124
+ - MIT
125
+ metadata:
126
+ source_code_uri: https://github.com/EndlessInternational/datalab
127
+ bug_tracker_uri: https://github.com/EndlessInternational/datalab/issues
128
+ post_install_message:
129
+ rdoc_options: []
130
+ require_paths:
131
+ - lib
132
+ required_ruby_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ">="
135
+ - !ruby/object:Gem::Version
136
+ version: '3.0'
137
+ required_rubygems_version: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ requirements: []
143
+ rubygems_version: 3.5.19
144
+ signing_key:
145
+ specification_version: 4
146
+ summary: The Datalab gem implements a lightweight interface to the Datalab API which
147
+ provides document to Markdown conversion as well as sophisticated OCR for documents
148
+ and images.
149
+ test_files: []