iguvium 0.8.0 → 0.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -1
- data/exe/iguvium +13 -1
- data/lib/iguvium/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f3c9f4727d6069408bb795a4754c1b3f7098a997b7b6b47a9e114c4c04602f17
|
4
|
+
data.tar.gz: f7490d4f57e2e845740713cddff88fd558070f7ab4481598bf08fc504c33802c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b973456ea806aef36d4b8328ae2740f624e31bf169d4861d0c6e095133a2fe33e41780e11830c4f891b2d72204873e20225575130e59f5ab55fe6e496a9c0c45
|
7
|
+
data.tar.gz: 218edd15c127f9d705759d1f32dbedbcd68615d1c4d77d4ca8e63b0d0d12801c759444cc4d4967c81e3cc4d88b4a90fb76792794d69b29a25395561bc78eaf0e
|
data/README.md
CHANGED
@@ -60,6 +60,10 @@ And then execute:
|
|
60
60
|
Or install it yourself as:
|
61
61
|
|
62
62
|
$ gem install iguvium
|
63
|
+
|
64
|
+
If you're not a developer and have a Mac, you maybe have default Ruby installation and no development tools installed.
|
65
|
+
|
66
|
+
In this case, run `xcode-select --install` beforehand, and after that install Iguvium as admin: `sudo gem install iguvium`
|
63
67
|
|
64
68
|
## Usage
|
65
69
|
|
@@ -85,10 +89,12 @@ iguvium filename.pdf [options]
|
|
85
89
|
-p, --pages page numbers, comma-separated, no spaces
|
86
90
|
-i, --images use pictures in pdf (usually a bad idea)
|
87
91
|
-n, --newlines keep newlines
|
92
|
+
-t, --text extract full page text instead of tables
|
88
93
|
--verbose verbose output
|
89
94
|
```
|
90
95
|
|
91
|
-
Given a filename, it generates CSV files for the tables detected
|
96
|
+
Given a filename, it generates CSV files for the tables detected or, with `-t` option,
|
97
|
+
just page text. The latter is useful in case of whitespace-separated fixed-width tables.
|
92
98
|
|
93
99
|
## Implementation details
|
94
100
|
There are usually no actual tables in PDFs, only characters with coordinates,
|
data/exe/iguvium
CHANGED
@@ -9,6 +9,7 @@ opts = Slop.parse { |o|
|
|
9
9
|
o.array '-p', '--pages', 'page numbers, comma-separated, no spaces'
|
10
10
|
o.bool '-i', '--images', 'use pictures in pdf (usually a bad idea)'
|
11
11
|
o.bool '-n', '--newlines', 'keep newlines'
|
12
|
+
o.bool '-t', '--text', 'extract full page text instead of tables'
|
12
13
|
o.bool '--verbose', 'verbose output'
|
13
14
|
o.on '--version', 'print the version' do
|
14
15
|
puts Iguvium::VERSION
|
@@ -36,6 +37,17 @@ page_numbers = pages.count.times.to_a if page_numbers.empty?
|
|
36
37
|
# puts page_numbers.inspect
|
37
38
|
# puts opts.to_hash.inspect
|
38
39
|
|
40
|
+
if opts[:text]
|
41
|
+
page_numbers.each do |number|
|
42
|
+
print "Extracting page #{number + 1}... "
|
43
|
+
txt = pages[number].text
|
44
|
+
txt_file = "#{path.gsub(/\.pdf$/, '')}_page_#{number + 1}.txt"
|
45
|
+
puts "Saving #{File.expand_path txt_file}"
|
46
|
+
File.write txt_file, txt
|
47
|
+
end
|
48
|
+
exit
|
49
|
+
end
|
50
|
+
|
39
51
|
page_numbers.each do |number|
|
40
52
|
print "Extracting page #{number + 1}... "
|
41
53
|
tables = pages[number].extract_tables!(images: opts[:images])
|
@@ -45,7 +57,7 @@ page_numbers.each do |number|
|
|
45
57
|
csv = table.to_a(newlines: opts[:newlines]).map(&:to_csv).join
|
46
58
|
next if csv.empty?
|
47
59
|
csv_file = "#{path.gsub(/\.pdf$/, '')}_page_#{number + 1}_table_#{i}.csv"
|
48
|
-
puts "Saving #{File.expand_path
|
60
|
+
puts "Saving #{File.expand_path csv_file}"
|
49
61
|
File.write csv_file, csv
|
50
62
|
end
|
51
63
|
end
|
data/lib/iguvium/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iguvium
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dima Ermilov
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pdf-reader
|