pdftdx 0.2.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/pdftdx/parser.rb +3 -1
- data/lib/pdftdx/version.rb +1 -1
- metadata +1 -2
- data/bin/pdftdx +0 -26
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b6ac984f258036c4d20985529d0fdcda9fd3254f
|
4
|
+
data.tar.gz: 9b325fba42e742f0521317f4202a8b2a783114da
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0ee08da850b0cb3ee593c7bbcbb0dd4f0d1244619e512018a83964fa103797d108ba2a4f5eca28f8e19ff2901c2d26d20bf36ca26d3f4787cd20a6286ef50513
|
7
|
+
data.tar.gz: 8d230f85707d4820ce341222e95a6f71fb553a3d7d57b7622a70b8668a966f8e4626e37538a2fe563855c0c3c50cdd846563ba3f6408318d0c39879c9ab2d710
|
data/Gemfile.lock
CHANGED
data/lib/pdftdx/parser.rb
CHANGED
@@ -59,6 +59,8 @@ module PDFTDX
|
|
59
59
|
|
60
60
|
# DEBUG
|
61
61
|
puts "=============> #{table}"
|
62
|
+
|
63
|
+
table
|
62
64
|
end
|
63
65
|
|
64
66
|
# HTML Filter
|
@@ -74,7 +76,7 @@ module PDFTDX
|
|
74
76
|
|
75
77
|
# Collect & Process File Data
|
76
78
|
off = 0
|
77
|
-
|
79
|
+
process_data page_data.collect { |_idx, page| off = off + PAGE_OFF; page.select { |l| LINE_REGEX =~ l }.collect { |l| LINE_REGEX.match l }.collect { |d| { top: off + d[1].to_i, left: d[2].to_i, data: hfilter(coder.decode(d[3])) } } }.flatten
|
78
80
|
end
|
79
81
|
end
|
80
82
|
end
|
data/lib/pdftdx/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdftdx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eresse
|
@@ -72,7 +72,6 @@ files:
|
|
72
72
|
- LICENSE.txt
|
73
73
|
- README.md
|
74
74
|
- Rakefile
|
75
|
-
- bin/pdftdx
|
76
75
|
- lib/pdftdx.rb
|
77
76
|
- lib/pdftdx/parser.rb
|
78
77
|
- lib/pdftdx/version.rb
|
data/bin/pdftdx
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# PDF Table Data Extractor
|
4
|
-
# by Eresse <eresse@eresse.net>
|
5
|
-
|
6
|
-
# Internal Includes
|
7
|
-
require 'pdftdx'
|
8
|
-
|
9
|
-
# Usage
|
10
|
-
def usage
|
11
|
-
puts "Usage: #{$0} <PDF_FILE>"
|
12
|
-
exit 1
|
13
|
-
end
|
14
|
-
|
15
|
-
# Main
|
16
|
-
def main args
|
17
|
-
|
18
|
-
# Check Args
|
19
|
-
usage unless args.length == 1
|
20
|
-
|
21
|
-
# Extract Data from provided PDF File\
|
22
|
-
PDFTDX.extract_data args[0]
|
23
|
-
end
|
24
|
-
|
25
|
-
# Call Main
|
26
|
-
main ARGV
|