receipt_data_extraction 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 37d4fb35a9ca3dd44afee22b7e56c3a2e1be62de
4
+ data.tar.gz: cb0ebcba1f12c4f4f8433de6c0d7fe5df7b01b15
5
+ SHA512:
6
+ metadata.gz: 38af6da3c6612e984649d16b7b51ca27a441c6e37b9b06d8467799ec3d7f5a9dd06ea164da9cf173f7f034baaac5c5fabdb0f78d08419c27088dbecd75d5251e
7
+ data.tar.gz: d72d34844cf22e4d135776bab8f0ae2a10401c8152a78372b0096c86d1089eb0a1c14fb171ee0c938f6349306114d727e695f0e668697e212e6bc832bead98a1
@@ -0,0 +1,85 @@
1
+ require "reg_exp"
2
+ require "google/cloud/vision"
3
+
4
+ module ReceiptDataExtraction
5
+ class Extract
6
+
7
+ def self.call_vision_api(image)
8
+ image_annotator = Google::Cloud::Vision::ImageAnnotator.new
9
+ response = image_annotator.text_detection(
10
+ image: image,
11
+ max_results: 1 # optional, defaults to 10
12
+ )
13
+ text_detected = ""
14
+ response.responses.each do |res|
15
+ text_detected << res.text_annotations[0].description
16
+ end
17
+ return text_detected
18
+ end
19
+ end
20
+
21
+ class Specific_data
22
+ def self.get_shop_name(text)
23
+ shop_name = text.split(Name_reg).first
24
+ text = text.sub!(shop_name, "")
25
+ return shop_name
26
+ end
27
+
28
+ def self.get_address(text)
29
+ address = text.split(Name_reg)[1]
30
+ text = text.sub!(address,"")
31
+ address2 = text.split(Name_reg)[2]
32
+ address = address + " " + address2
33
+ return address
34
+ end
35
+
36
+ def self.get_date(text)
37
+ date = text.match(Date_reg)
38
+ date = Date.parse(date[0]).strftime("%d/%m/%Y")
39
+ return date
40
+ end
41
+
42
+ def self.get_total(text)
43
+ total = 0.00
44
+ text.scan(Total_reg) do |amount|
45
+ if total < amount[0].to_f
46
+ total = amount[0].to_f
47
+ end
48
+ end
49
+ return total
50
+ end
51
+
52
+ def self.get_products(text)
53
+ text.scan(Product_reg) do |qtde, name_product, price, price_total|
54
+ if (name_product != "") && (price != nil)
55
+ if qtde != nil
56
+ puts "-qtde: #{qtde}"
57
+ end
58
+ puts "-name_product: #{name_product}"
59
+ puts "Price: #{price}"
60
+ if price_total != nil
61
+ puts "Price_Total: #{price_total}"
62
+ end
63
+ end
64
+ end
65
+ end
66
+
67
+ def self.get_data_before_total(text)
68
+ text = text.split(Clean_text)[0]
69
+ return text
70
+ end
71
+
72
+ #remove all discounts/cancellations
73
+ def remove_neg_values(text)
74
+ text = text.gsub(Negative_values, "")
75
+ return text
76
+ end
77
+ end
78
+ end
79
+
80
+ #text_detected = ReceiptDataExtraction::Extract.call_vision_api('../spar.jpg')
81
+
82
+
83
+
84
+
85
+
data/lib/reg_exp.rb ADDED
@@ -0,0 +1,19 @@
1
+ module ReceiptDataExtraction
2
+ Name_reg = /\n/
3
+
4
+ #12/11/2010 or 11 jan 2011 or 12 JAN 2011 or 12 Jan 2011
5
+ month_1 = "(J|j)an|(F|f)eb|(M|m)ar|(A|a)pr|(M|m)ay|(J|j)un|(J|j)ul|(A|a)ug|(S|s)ep|(O|o)ct|(N|n)ov|(D|d)ec"
6
+ month_2 = "JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC"
7
+ Date_reg = /(\s(?<day>(\d{1,2}))(\s|\/|\-)(?<month>((\d+)|(#{month_1})|(#{month_2})))(\s|\/|\-)(?<year>(\d{2,4}))\s+)/
8
+
9
+ #Qty | Name | Price | Total
10
+ quantity = /(?<qtde>(\d+))/
11
+ item_name = /(?<name_product>(.*?))/
12
+ price = /(€|EUR)?(?<price>(\d{1,3}\,)*\d{1,3}\.\d{2})/
13
+ price_total = /(?<price_total>(\d{1,3}\,)*\d{1,3}\.\d{2})/
14
+ Product_reg = /((#{quantity})\s+)?#{item_name}\s*#{price}(\s*#{price_total})?/
15
+
16
+ Negative_values = /(€|EUR)?\-(?<price>(\d{1,3}\,)*\d{1,3}\.\d{2})/
17
+ Total_reg = /(\s*(\btotal\b)|(\bTotal\b)|(\bTOTAL\b))\D*\s*(?<amount>(\d{1,3}\,)*\d{1,3}\.\d{2})/
18
+ Clean_text = /.*?(?=#{Total_reg})/
19
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: receipt_data_extraction
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Marcio Camargo
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-04-15 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: returns
14
+ email: marjoscam@comp.ufla.br
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/receipt_data_extraction.rb
20
+ - lib/reg_exp.rb
21
+ homepage: https://github.com/mjcamargo/receipt_data_extraction
22
+ licenses:
23
+ - MIT
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.6.8
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: Extract data from receipts using google vision
45
+ test_files: []