pdftdx 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 39b1feac0f38ec7816d8b192b3c1d9617889abf9
4
- data.tar.gz: e36b4fc10906501a6ad5afa438e6f01a264e0005
3
+ metadata.gz: b6ac984f258036c4d20985529d0fdcda9fd3254f
4
+ data.tar.gz: 9b325fba42e742f0521317f4202a8b2a783114da
5
5
  SHA512:
6
- metadata.gz: 12dfb86178338410ca972b03ddb871084d5a07b4eb5bede62727ae30655f9aa5636e89c0374b9d6902fb603ca9ac5538225be715086eacb8ab83edcc7c08d82e
7
- data.tar.gz: e50c7770e11bfa584a84a6adecabbbb838219075d62306962073efa575a75867d445ff9429e34912f42d023a133ba62130d11d2327dd0235da3cbb04a44df648
6
+ metadata.gz: 0ee08da850b0cb3ee593c7bbcbb0dd4f0d1244619e512018a83964fa103797d108ba2a4f5eca28f8e19ff2901c2d26d20bf36ca26d3f4787cd20a6286ef50513
7
+ data.tar.gz: 8d230f85707d4820ce341222e95a6f71fb553a3d7d57b7622a70b8668a966f8e4626e37538a2fe563855c0c3c50cdd846563ba3f6408318d0c39879c9ab2d710
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pdftdx (0.1.0)
4
+ pdftdx (0.2.0)
5
5
  pdftohtml
6
6
 
7
7
  GEM
data/lib/pdftdx/parser.rb CHANGED
@@ -59,6 +59,8 @@ module PDFTDX
59
59
 
60
60
  # DEBUG
61
61
  puts "=============> #{table}"
62
+
63
+ table
62
64
  end
63
65
 
64
66
  # HTML Filter
@@ -74,7 +76,7 @@ module PDFTDX
74
76
 
75
77
  # Collect & Process File Data
76
78
  off = 0
77
- process page_data.collect { |_idx, page| off = off + PAGE_OFF; page.select { |l| LINE_REGEX =~ l }.collect { |l| LINE_REGEX.match l }.collect { |d| { top: off + d[1].to_i, left: d[2].to_i, data: hfilter(coder.decode(d[3])) } } }.flatten
79
+ process_data page_data.collect { |_idx, page| off = off + PAGE_OFF; page.select { |l| LINE_REGEX =~ l }.collect { |l| LINE_REGEX.match l }.collect { |d| { top: off + d[1].to_i, left: d[2].to_i, data: hfilter(coder.decode(d[3])) } } }.flatten
78
80
  end
79
81
  end
80
82
  end
@@ -5,5 +5,5 @@
5
5
  module PDFTDX
6
6
 
7
7
  # Version
8
- VERSION = '0.2.0'
8
+ VERSION = '0.3.1'
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdftdx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eresse
@@ -72,7 +72,6 @@ files:
72
72
  - LICENSE.txt
73
73
  - README.md
74
74
  - Rakefile
75
- - bin/pdftdx
76
75
  - lib/pdftdx.rb
77
76
  - lib/pdftdx/parser.rb
78
77
  - lib/pdftdx/version.rb
data/bin/pdftdx DELETED
@@ -1,26 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # PDF Table Data Extractor
4
- # by Eresse <eresse@eresse.net>
5
-
6
- # Internal Includes
7
- require 'pdftdx'
8
-
9
- # Usage
10
- def usage
11
- puts "Usage: #{$0} <PDF_FILE>"
12
- exit 1
13
- end
14
-
15
- # Main
16
- def main args
17
-
18
- # Check Args
19
- usage unless args.length == 1
20
-
21
- # Extract Data from provided PDF File\
22
- PDFTDX.extract_data args[0]
23
- end
24
-
25
- # Call Main
26
- main ARGV