datalab 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9ae1f504f63608091a53657a07e315c7578fb6a1d31f260a083db626feabdf20
4
+ data.tar.gz: d41b72dcba3ce505311318220acf789b6329f56069ba27783c18c6936b76cbe1
5
+ SHA512:
6
+ metadata.gz: '08a22067023a2ba935cad056132a9a190a5c5f16068ae77d51f59f68dd42f428eb5306d974599a6d0553a9a7a71ce733aa88e210af2a48af4699a68a223cdc4a'
7
+ data.tar.gz: f49525b822f5739ef7613d56ab0e87322e903f3debacd22ca43cbb73c8bd7989cab3bf5c80145e613ce74428ab2d9afd6b6fcc9da5665d6a269fc964d8df7077
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Endless International
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/datalab.gemspec ADDED
@@ -0,0 +1,35 @@
1
+ Gem::Specification.new do | spec |
2
+
3
+ spec.name = 'datalab'
4
+ spec.version = '0.1.0'
5
+ spec.authors = [ 'Kristoph Cichocki-Romanov' ]
6
+ spec.email = [ 'rubygems.org@kristoph.net' ]
7
+
8
+ spec.summary =
9
+ "The Datalab gem implements a lightweight interface to the Datalab API which provides " \
10
+ "document to Markdown conversion as well as sophisticated OCR for documents and images."
11
+ spec.description =
12
+ "The Datalab gem implements a lightweight interface to the Datalab API. The Datalab API " \
13
+ "can convert a number of document formats, including PDF, Word and Powerpoint to Markdown. " \
14
+ "In addition in offers sophisticate OCR, layout and line detection for documents an images."
15
+ spec.license = 'MIT'
16
+ spec.homepage = 'https://github.com/EndlessInternational/datalab'
17
+ spec.metadata = {
18
+ 'source_code_uri' => 'https://github.com/EndlessInternational/datalab',
19
+ 'bug_tracker_uri' => 'https://github.com/EndlessInternational/datalab/issues',
20
+ # 'documentation_uri' => 'https://github.com/EndlessInternational/datalab'
21
+ }
22
+
23
+ spec.required_ruby_version = '>= 3.0'
24
+ spec.files = Dir[ "lib/**/*.rb", "LICENSE", "README.md", "datalab.gemspec" ]
25
+ spec.require_paths = [ "lib" ]
26
+
27
+ spec.add_runtime_dependency 'faraday', '~> 2.7'
28
+ spec.add_runtime_dependency 'faraday-multipart', '~>1.0'
29
+ spec.add_runtime_dependency 'dynamicschema', '~> 1.0.0.beta04'
30
+
31
+ spec.add_development_dependency 'rspec', '~> 3.13'
32
+ spec.add_development_dependency 'debug', '~> 1.9'
33
+ spec.add_development_dependency 'vcr', '~> 6.3'
34
+
35
+ end
@@ -0,0 +1,55 @@
1
+ module Datalab
2
+ class ErrorResult
3
+
4
+ attr_reader :error_type, :error_description
5
+
6
+ def initialize( status_code, attributes = nil )
7
+ @error_code, @error_description = status_code_to_error( status_code )
8
+ if detail = attributes[ :detail ]&.first rescue nil
9
+ if message = detail[ :msg ]
10
+ @error_description = message
11
+ if location = detail[ :loc ]
12
+ @error_description += " : " + [ location ].flatten.join( '/' )
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+ private
19
+ def status_code_to_error( status_code )
20
+ case status_code
21
+ # this is here because I've noted invalid payloads being returned with a 200
22
+ when 200
23
+ [ :unexpected_error,
24
+ "The response was successful but it did not include a valid payload." ]
25
+ when 400
26
+ [ :invalid_request_error,
27
+ "There was an issue with the format or content of your request." ]
28
+ when 401
29
+ [ :authentication_error,
30
+ "There's an issue with your API key." ]
31
+ when 402
32
+ [ :payment_required,
33
+ "The request requires a paid account" ]
34
+ when 404
35
+ [ :not_found_error,
36
+ "The requested resource was not found." ]
37
+ when 422
38
+ [ :invalid_data_error,
39
+ "The request body is invalid." ]
40
+ when 429
41
+ [ :rate_limit_error,
42
+ "Your account has hit a rate limit." ]
43
+ when 500..505
44
+ [ :api_error,
45
+ "An unexpected Datalab server error has occurred." ]
46
+ when 529
47
+ [ :overloaded_error,
48
+ "The Datalab service is overloaded." ]
49
+ else
50
+ [ :unknown_error,
51
+ "The Datalab service returned an unexpected status code: '#{status_code}'." ]
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,34 @@
1
+ module Datalab
2
+ class MarkerOptions
3
+ include DynamicSchema::Definable
4
+
5
+ schema do
6
+ maximum_pages Integer, as: :max_pages, in: (1..)
7
+ languages String, array: true
8
+ force_ocr [ TrueClass, FalseClass ]
9
+ paginate [ TrueClass, FalseClass ]
10
+ extract_images [ TrueClass, FalseClass ]
11
+ end
12
+
13
+ def self.build( options = nil, &block )
14
+ new( api_options: builder.build( options, &block ) )
15
+ end
16
+
17
+ def self.build!( options = nil, &block )
18
+ new( api_options: builder.build!( options, &block ) )
19
+ end
20
+
21
+ def initialize( options = {}, api_options: nil )
22
+ @options = self.class.builder.build( options || {} )
23
+ @options = api_options.merge( @options ) if api_options
24
+ @options[ :formats ]&.map! { | format | string_camelize( format.to_s ) }
25
+ end
26
+
27
+ def to_h
28
+ @options.to_h
29
+ end
30
+
31
+ end
32
+ end
33
+
34
+
@@ -0,0 +1,99 @@
1
+ module Datalab
2
+
3
+ ##
4
+ # The +MarkerRequest+ class encapsulates a document conversion request in the Datalab API.
5
+ # After instantiating a new +MarkerRequest+ instance you can begin a markdown conversion
6
+ # document by calling the +submit+ method and then subsequently retrieving the results by
7
+ # calling the +retrieve+' method.
8
+ #
9
+ # === examples
10
+ #
11
+ # require 'datalab'
12
+ #
13
+ # request = Datalab::MarkerRequest.new( api_key: ENV[ 'DATALAB_API_KEY' ] )
14
+ #
15
+ # file = Faraday::UploadIO.new( ARGV[ 0 ], 'application/pdf' )
16
+ # response = request.submit( file )
17
+ # while response.success? && ( result = response.result ).success?
18
+ # result = request.retrieve( result )
19
+ # break if result.complete?
20
+ # end
21
+ #
22
+ # if response.success?
23
+ # if response.result.success?
24
+ # puts response.result.markdown
25
+ # else
26
+ # puts response.result.failure_message
27
+ # end
28
+ # else
29
+ # puts response.result.error_description
30
+ # end
31
+ #
32
+ class MarkerRequest < Request
33
+
34
+ ##
35
+ # The +submit+ method makes a Datalab '/marker' POST request which will begin conversion of
36
+ # the given file to markdown.
37
+ #
38
+ # The response is always an instance of +Faraday::Response+. If +response.success?+ is true,
39
+ # then +response.result+ will be an instance +MarkerResult+. If the request is not successful
40
+ # then +response.result+ will be an instance of +ErrorResult+.
41
+ #
42
+ # Remember that you should call +response.success?+ to validate that the call to the API was
43
+ # successful and then +response.result.success?+ to validate that the API processed the
44
+ # request successfuly.
45
+ #
46
+ def submit( file, options = nil, &block )
47
+ if options
48
+ options = options.is_a?( MarkerOptions ) ? options : MarkerOptions.build( options.to_h )
49
+ options = options.to_h
50
+ else
51
+ options = {}
52
+ end
53
+ options[ :file ] = file
54
+ response = post( "#{BASE_URI}/marker", options, &block )
55
+ result = nil
56
+ if response.success?
57
+ attributes = ( JSON.parse( response.body, symbolize_names: true ) rescue nil )
58
+ attributes ||= { success: false, status: :failed, error: 'An unknown error occured.' }
59
+ result = MarkerResult.new( attributes )
60
+ else
61
+ attributes = JSON.parse( response.body, symbolize_names: true ) rescue {}
62
+ result = ErrorResult.new( response.status, attributes )
63
+ end
64
+
65
+ ResponseMethods.install( response, result )
66
+ end
67
+
68
+ ##
69
+ # The +retrieve+ method takes the successful result of the submit method and makes a Datalab
70
+ # '/marker/{id}' GET request which will return the conversion progress result or, if
71
+ # conversion has been completed, the conversion results.
72
+ #
73
+ # The response is always an instance of +Faraday::Response+. If +response.success?+ is +true+,
74
+ # then +response.result+ will be an instance +Datalab::MarkerResult+. If the request is not
75
+ # successful then +response.result+ will be an instance of +Datalab::ErrorResult+.
76
+ #
77
+ # Remember that you should call +response.success?+ to valida that the call to the API was
78
+ # successful and then +response.result.success?+ to validate that the API processed the
79
+ # request successfuly.
80
+ #
81
+ def retrieve( submit_result, &block )
82
+ raise ArgumentError, "The first argument must be an instance of MarkerResult." \
83
+ unless submit_result.is_a?( MarkerResult )
84
+
85
+ response = get( "#{BASE_URI}/marker/#{submit_result.id}", &block )
86
+ result = nil
87
+ attributes = JSON.parse( response.body, symbolize_names: true ) rescue nil
88
+
89
+ if response.success?
90
+ result = submit_result.merge( attributes || { success: false, status: :failed } )
91
+ else
92
+ result = ErrorResult.new( response.status, attributes || {} )
93
+ end
94
+
95
+ ResponseMethods.install( response, result )
96
+ end
97
+
98
+ end
99
+ end
@@ -0,0 +1,75 @@
1
+ module Datalab
2
+ class MarkerResult
3
+
4
+ def initialize( attributes )
5
+ @success =
6
+ attributes[ :success ] ||
7
+ [ :processing, :complete ].include?( attributes[ :status ]&.to_sym )
8
+ @attributes = attributes || {}
9
+ end
10
+
11
+ def id
12
+ @attributes[ :request_id ]
13
+ end
14
+
15
+ ##
16
+ # The +success?+ method returns +true+ if the converstion was successful.
17
+ #
18
+ # Note that the response +success?+ tells you if the call to the Datalab API was successful
19
+ # while this +success?+ method tells you if the actual conversaion operation began
20
+ # successfully.
21
+ #
22
+ def success?
23
+ @success || false
24
+ end
25
+
26
+ def status
27
+ @attributes[ :status ]&.to_sym || :processing
28
+ end
29
+
30
+ def processing?
31
+ status == :processing
32
+ end
33
+
34
+ def complete?
35
+ status == :complete
36
+ end
37
+
38
+ ##
39
+ # If +success?+ returns +false+ this method will return a message explaining the reason
40
+ # for the failure.
41
+ #
42
+ def failure_message
43
+ @attributes[ :error ]
44
+ end
45
+
46
+ ##
47
+ # The +markdown+ method returns the markdown content extracted from the given document.
48
+ #
49
+ def markdown
50
+ @attributes[ :markdown ]
51
+ end
52
+
53
+ ##
54
+ #
55
+ #
56
+ def images
57
+ @attribute[ :images ]
58
+ end
59
+
60
+ def metadata
61
+ unless @metadata
62
+ metadata = @attributes[ :metadata ] || {}
63
+ @metadata = metadata.transform_keys do | key |
64
+ key.to_s.gsub( /([a-z])([A-Z])/, '\1_\2' ).downcase
65
+ end
66
+ end
67
+ @metadata
68
+ end
69
+
70
+ def merge( attributes )
71
+ self.class.new( ( @attributes || {} ).merge( attributes ) )
72
+ end
73
+
74
+ end
75
+ end
@@ -0,0 +1,18 @@
1
+ module Datalab
2
+ module ModuleMethods
3
+ DEFAULT_CONNECTION = Faraday.new do | builder |
4
+ builder.request :multipart
5
+ builder.request :url_encoded
6
+ builder.adapter Faraday.default_adapter
7
+ end
8
+
9
+ def connection( connection = nil )
10
+ @connection = connection || @connection || DEFAULT_CONNECTION
11
+ end
12
+
13
+ def api_key( api_key = nil )
14
+ @api_key = api_key || @api_key
15
+ @api_key
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,30 @@
1
+ module Datalab
2
+ class OcrOptions
3
+ include DynamicSchema::Definable
4
+
5
+ schema do
6
+ maximum_pages Integer, as: :max_pages, in: (1..)
7
+ languages String, array: true
8
+ end
9
+
10
+ def self.build( options = nil, &block )
11
+ new( api_options: builder.build( options, &block ) )
12
+ end
13
+
14
+ def self.build!( options = nil, &block )
15
+ new( api_options: builder.build!( options, &block ) )
16
+ end
17
+
18
+ def initialize( options = {}, api_options: nil )
19
+ @options = self.class.builder.build( options || {} )
20
+ @options = api_options.merge( @options ) if api_options
21
+ end
22
+
23
+ def to_h
24
+ @options.to_h
25
+ end
26
+
27
+ end
28
+ end
29
+
30
+
@@ -0,0 +1,23 @@
1
+ module Datalab
2
+ class OcrPage
3
+ def initialize( attributes )
4
+ @attributes = attributes&.dup || {}
5
+ end
6
+
7
+ def text_lines
8
+ ( @attributes[ :text_lines ] || [] ).map( &OcrTextLine.method( :new ) )
9
+ end
10
+
11
+ def languages
12
+ @attributes[ :languages ]
13
+ end
14
+
15
+ def number
16
+ @attributes[ :page ]
17
+ end
18
+
19
+ def bounding_rectangle
20
+ @attributes[ :image_bbox ]
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,98 @@
1
+ module Datalab
2
+
3
+ ##
4
+ # The +OcrRequest+ class encapsulates a document or image recognition request in the Datalab API.
5
+ # After instantiating a new +OcrRequest+ instance you can begin recognition by calling the
6
+ # +submit+ method and then subsequently retrieving the results by calling the +retrieve+ method.
7
+ #
8
+ # === examples
9
+ #
10
+ # require 'datalab'
11
+ #
12
+ # request = Datalab::OcrRequest.new( api_key: ENV[ 'DATALAB_API_KEY' ] )
13
+ #
14
+ # file = Faraday::UploadIO.new( ARGV[ 0 ], 'image/jpeg' )
15
+ # response = request.submit( file )
16
+ # while response.success? && ( result = response.result ).success?
17
+ # result = request.retrieve( result )
18
+ # break if result.complete?
19
+ # end
20
+ #
21
+ # if response.success?
22
+ # if response.result.success?
23
+ # puts response.result.markdown
24
+ # else
25
+ # puts response.result.failure_message
26
+ # end
27
+ # else
28
+ # puts response.result.error_description
29
+ # end
30
+ #
31
+ class OcrRequest < Request
32
+
33
+ ##
34
+ # The +submit+ method makes a Datalab '/ocr' POST request which will begin recognition of the
35
+ # given file.
36
+ #
37
+ # The response is always an instance of +Faraday::Response+. If +response.success?+ is true,
38
+ # then +response.result+ will be an instance +OcrResult+. If the request is not successful
39
+ # then +response.result+ will be an instance of +ErrorResult+.
40
+ #
41
+ # Remember that you should call +response.success?+ to validate that the call to the API was
42
+ # successful and then +response.result.success?+ to validate that the API processed the
43
+ # request successfuly.
44
+ #
45
+ def submit( file, options = nil, &block )
46
+ if options
47
+ options = options.is_a?( OcrOptions ) ? options : OcrOptions.build( options.to_h )
48
+ options = options.to_h
49
+ else
50
+ options = {}
51
+ end
52
+ options[ :file ] = file
53
+ response = post( "#{BASE_URI}/ocr", options, &block )
54
+ result = nil
55
+ if response.success?
56
+ attributes = ( JSON.parse( response.body, symbolize_names: true ) rescue nil )
57
+ attributes ||= { success: false, status: :failed, error: 'An unknown error occured.' }
58
+ result = OcrResult.new( attributes )
59
+ else
60
+ attributes = JSON.parse( response.body, symbolize_names: true ) rescue {}
61
+ result = ErrorResult.new( response.status, attributes )
62
+ end
63
+
64
+ ResponseMethods.install( response, result )
65
+ end
66
+
67
+ ##
68
+ # The +retrieve+ method takes the successful result of the submit method and makes a Datalab
69
+ # '/ocr/{id}' GET request which will return the recognition progress result or, if recognition
70
+ # has been completed, the recognition results.
71
+ #
72
+ # The response is always an instance of +Faraday::Response+. If +response.success?+ is +true+,
73
+ # then +response.result+ will be an instance +Datalab::OcrResult+. If the request is not
74
+ # successful then +response.result+ will be an instance of +Datalab::ErrorResult+.
75
+ #
76
+ # Remember that you should call +response.success?+ to valide that the call to the API was
77
+ # successful and then +response.result.success?+ to validate that the API processed the
78
+ # request successfuly.
79
+ #
80
+ def retrieve( submit_result, &block )
81
+ raise ArgumentError, "The first argument must be an instance of OcrResult." \
82
+ unless submit_result.is_a?( OcrResult )
83
+
84
+ response = get( "#{BASE_URI}/ocr/#{submit_result.id}", &block )
85
+ result = nil
86
+ attributes = JSON.parse( response.body, symbolize_names: true ) rescue nil
87
+
88
+ if response.success?
89
+ result = submit_result.merge( attributes || { success: false, status: :failed } )
90
+ else
91
+ result = ErrorResult.new( response.status, attributes || {} )
92
+ end
93
+
94
+ ResponseMethods.install( response, result )
95
+ end
96
+
97
+ end
98
+ end
@@ -0,0 +1,64 @@
1
+ module Datalab
2
+ class OcrResult
3
+
4
+ def initialize( attributes )
5
+ @success =
6
+ attributes[ :success ] ||
7
+ [ :processing, :complete ].include?( attributes[ :status ]&.to_sym )
8
+ @attributes = attributes || {}
9
+ end
10
+
11
+ def id
12
+ @attributes[ :request_id ]
13
+ end
14
+
15
+ ##
16
+ # The +success?+ method returns +true+ if the converstion was successful.
17
+ #
18
+ # Note that the response +success?+ tells you if the call to the Datalab API was successful
19
+ # while this +success?+ method tells you if the actual conversaion operation began
20
+ # successfully.
21
+ #
22
+ def success?
23
+ @success || false
24
+ end
25
+
26
+ def status
27
+ @attributes[ :status ]&.to_sym || :processing
28
+ end
29
+
30
+ def processing?
31
+ status == :processing
32
+ end
33
+
34
+ def complete?
35
+ status == :complete
36
+ end
37
+
38
+ ##
39
+ # If +success?+ returns +false+ this method will return a message explaining the reason
40
+ # for the failure.
41
+ #
42
+ def failure_message
43
+ @attributes[ :error ]
44
+ end
45
+
46
+ ##
47
+ # The +pages+ method returns the pages extracted from the given document. If the given
48
+ # document was an image the result is an array with a single page. If no pages were recognized
49
+ # the result is an empty array.
50
+ #
51
+ def pages
52
+ ( @attributes[ :pages ] || [] ).map( &OcrPage.method( :new ) )
53
+ end
54
+
55
+ def page_count
56
+ @attributes[ :page_count ]
57
+ end
58
+
59
+ def merge( attributes )
60
+ self.class.new( ( @attributes || {} ).merge( attributes ) )
61
+ end
62
+
63
+ end
64
+ end
@@ -0,0 +1,23 @@
1
+ module Datalab
2
+ class OcrTextLine
3
+ def initialize( attributes )
4
+ @attributes = attributes&.dup || {}
5
+ end
6
+
7
+ def text
8
+ @attributes[ :text ]
9
+ end
10
+
11
+ def confidence
12
+ @attributes[ :confidence ]
13
+ end
14
+
15
+ def bounding_polygon
16
+ @attributes[ :polygon ]
17
+ end
18
+
19
+ def bounding_rectangle
20
+ @attributes[ :image_bbox ]
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,38 @@
1
+ module Datalab
2
+
3
+ ##
4
+ # The +Request+ class encapsulates a request to the Datalab API. This class serves as the
5
+ # implementation of the MarkerRequest, OcrRequest and other classes and should not be used
6
+ # directly.
7
+ #
8
+ class Request
9
+
10
+ BASE_URI = 'https://www.datalab.to/api/v1'
11
+
12
+ ##
13
+ # The +initialize+ method initializes the +Request+ instance. You can pass an +api_key+ and
14
+ # and optionally a (Faraday) +connection+.
15
+ #
16
+ def initialize( connection: nil, api_key: nil )
17
+ @connection = connection || Datalab.connection
18
+ @api_key = api_key || Datalab.api_key
19
+ raise ArgumentError, "An 'api_key' is required unless configured using 'Datalab.api_key'." \
20
+ unless @api_key
21
+ end
22
+
23
+ protected
24
+
25
+ def post( uri, body, &block )
26
+ @connection.post( uri, body, 'X-Api-Key' => @api_key, &block )
27
+ end
28
+
29
+ def get( uri, &block )
30
+ @connection.get( uri ) do | request |
31
+ request.headers[ 'X-Api-Key' ] = @api_key
32
+ block.call( request ) if block
33
+ end
34
+ end
35
+
36
+ end
37
+
38
+ end
@@ -0,0 +1,15 @@
1
+ module Datalab
2
+ #
3
+ # The ResponseMethods module extends a Faraday reponse, adding the +result+ method.
4
+ #
5
+ module ResponseMethods
6
+ def self.install( response, result )
7
+ response.instance_variable_set( "@_datalab_result", result )
8
+ response.extend( ResponseMethods )
9
+ end
10
+
11
+ def result
12
+ @_datalab_result
13
+ end
14
+ end
15
+ end
data/lib/datalab.rb ADDED
@@ -0,0 +1,29 @@
1
+ require 'json'
2
+ require 'base64'
3
+ require 'uri'
4
+
5
+ require 'faraday'
6
+ require 'faraday/multipart'
7
+ require 'dynamic_schema'
8
+
9
+ require_relative 'datalab/error_result'
10
+ require_relative 'datalab/request'
11
+ require_relative 'datalab/response_methods'
12
+
13
+ require_relative 'datalab/marker_options'
14
+ require_relative 'datalab/marker_result'
15
+ require_relative 'datalab/marker_request'
16
+
17
+ require_relative 'datalab/ocr_text_line'
18
+ require_relative 'datalab/ocr_page'
19
+ require_relative 'datalab/ocr_options'
20
+ require_relative 'datalab/ocr_result'
21
+ require_relative 'datalab/ocr_request'
22
+
23
+ require_relative 'datalab/module_methods'
24
+
25
+ module Datalab
26
+ extend ModuleMethods
27
+ end
28
+
29
+
metadata ADDED
@@ -0,0 +1,149 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: datalab
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kristoph Cichocki-Romanov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-11-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: faraday
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: faraday-multipart
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: dynamicschema
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 1.0.0.beta04
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 1.0.0.beta04
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.13'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.13'
69
+ - !ruby/object:Gem::Dependency
70
+ name: debug
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.9'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.9'
83
+ - !ruby/object:Gem::Dependency
84
+ name: vcr
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '6.3'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '6.3'
97
+ description: The Datalab gem implements a lightweight interface to the Datalab API.
98
+ The Datalab API can convert a number of document formats, including PDF, Word and
99
+ Powerpoint to Markdown. In addition in offers sophisticate OCR, layout and line
100
+ detection for documents an images.
101
+ email:
102
+ - rubygems.org@kristoph.net
103
+ executables: []
104
+ extensions: []
105
+ extra_rdoc_files: []
106
+ files:
107
+ - LICENSE
108
+ - datalab.gemspec
109
+ - lib/datalab.rb
110
+ - lib/datalab/error_result.rb
111
+ - lib/datalab/marker_options.rb
112
+ - lib/datalab/marker_request.rb
113
+ - lib/datalab/marker_result.rb
114
+ - lib/datalab/module_methods.rb
115
+ - lib/datalab/ocr_options.rb
116
+ - lib/datalab/ocr_page.rb
117
+ - lib/datalab/ocr_request.rb
118
+ - lib/datalab/ocr_result.rb
119
+ - lib/datalab/ocr_text_line.rb
120
+ - lib/datalab/request.rb
121
+ - lib/datalab/response_methods.rb
122
+ homepage: https://github.com/EndlessInternational/datalab
123
+ licenses:
124
+ - MIT
125
+ metadata:
126
+ source_code_uri: https://github.com/EndlessInternational/datalab
127
+ bug_tracker_uri: https://github.com/EndlessInternational/datalab/issues
128
+ post_install_message:
129
+ rdoc_options: []
130
+ require_paths:
131
+ - lib
132
+ required_ruby_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ">="
135
+ - !ruby/object:Gem::Version
136
+ version: '3.0'
137
+ required_rubygems_version: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ requirements: []
143
+ rubygems_version: 3.5.19
144
+ signing_key:
145
+ specification_version: 4
146
+ summary: The Datalab gem implements a lightweight interface to the Datalab API which
147
+ provides document to Markdown conversion as well as sophisticated OCR for documents
148
+ and images.
149
+ test_files: []