iguvium 0.8.0 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 50748a9a17471a1f0f58a108ee632344cafad4c0ee6ab6096dbdb297c53b1381
4
- data.tar.gz: 6e676513f8aaee937f2dbb28316452a0dcd38f850afdc31a5ef6685200189814
3
+ metadata.gz: f3c9f4727d6069408bb795a4754c1b3f7098a997b7b6b47a9e114c4c04602f17
4
+ data.tar.gz: f7490d4f57e2e845740713cddff88fd558070f7ab4481598bf08fc504c33802c
5
5
  SHA512:
6
- metadata.gz: 4b0c5fc257ae4e8b667f0611f708e744c806de1f319e016e42df3553273c0cda445fe23f29e76ed8ef980eaf4680c21ecd7dccdf4602b721e5731981eace3097
7
- data.tar.gz: 563368859fa3684b8baa54f25e788c91f76d46c71b56bfa271fd9d6db2b9e6f74ad4d5937488b42c5f0912fdcc77e09432c214fde7d5c76dca85884e594912cf
6
+ metadata.gz: b973456ea806aef36d4b8328ae2740f624e31bf169d4861d0c6e095133a2fe33e41780e11830c4f891b2d72204873e20225575130e59f5ab55fe6e496a9c0c45
7
+ data.tar.gz: 218edd15c127f9d705759d1f32dbedbcd68615d1c4d77d4ca8e63b0d0d12801c759444cc4d4967c81e3cc4d88b4a90fb76792794d69b29a25395561bc78eaf0e
data/README.md CHANGED
@@ -60,6 +60,10 @@ And then execute:
60
60
  Or install it yourself as:
61
61
 
62
62
  $ gem install iguvium
63
+
64
+ If you're not a developer and have a Mac, you maybe have default Ruby installation and no development tools installed.
65
+
66
+ In this case, run `xcode-select --install` beforehand, and after that install Iguvium as admin: `sudo gem install iguvium`
63
67
 
64
68
  ## Usage
65
69
 
@@ -85,10 +89,12 @@ iguvium filename.pdf [options]
85
89
  -p, --pages page numbers, comma-separated, no spaces
86
90
  -i, --images use pictures in pdf (usually a bad idea)
87
91
  -n, --newlines keep newlines
92
+ -t, --text extract full page text instead of tables
88
93
  --verbose verbose output
89
94
  ```
90
95
 
91
- Given a filename, it generates CSV files for the tables detected
96
+ Given a filename, it generates CSV files for the tables detected or, with `-t` option,
97
+ just page text. The latter is useful in case of whitespace-separated fixed-width tables.
92
98
 
93
99
  ## Implementation details
94
100
  There are usually no actual tables in PDFs, only characters with coordinates,
data/exe/iguvium CHANGED
@@ -9,6 +9,7 @@ opts = Slop.parse { |o|
9
9
  o.array '-p', '--pages', 'page numbers, comma-separated, no spaces'
10
10
  o.bool '-i', '--images', 'use pictures in pdf (usually a bad idea)'
11
11
  o.bool '-n', '--newlines', 'keep newlines'
12
+ o.bool '-t', '--text', 'extract full page text instead of tables'
12
13
  o.bool '--verbose', 'verbose output'
13
14
  o.on '--version', 'print the version' do
14
15
  puts Iguvium::VERSION
@@ -36,6 +37,17 @@ page_numbers = pages.count.times.to_a if page_numbers.empty?
36
37
  # puts page_numbers.inspect
37
38
  # puts opts.to_hash.inspect
38
39
 
40
+ if opts[:text]
41
+ page_numbers.each do |number|
42
+ print "Extracting page #{number + 1}... "
43
+ txt = pages[number].text
44
+ txt_file = "#{path.gsub(/\.pdf$/, '')}_page_#{number + 1}.txt"
45
+ puts "Saving #{File.expand_path txt_file}"
46
+ File.write txt_file, txt
47
+ end
48
+ exit
49
+ end
50
+
39
51
  page_numbers.each do |number|
40
52
  print "Extracting page #{number + 1}... "
41
53
  tables = pages[number].extract_tables!(images: opts[:images])
@@ -45,7 +57,7 @@ page_numbers.each do |number|
45
57
  csv = table.to_a(newlines: opts[:newlines]).map(&:to_csv).join
46
58
  next if csv.empty?
47
59
  csv_file = "#{path.gsub(/\.pdf$/, '')}_page_#{number + 1}_table_#{i}.csv"
48
- puts "Saving #{File.expand_path(csv_file, __dir__)}"
60
+ puts "Saving #{File.expand_path csv_file}"
49
61
  File.write csv_file, csv
50
62
  end
51
63
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Iguvium
4
- VERSION = '0.8.0'
4
+ VERSION = '0.8.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iguvium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dima Ermilov
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-11-19 00:00:00.000000000 Z
11
+ date: 2018-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pdf-reader