iguvium 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 50748a9a17471a1f0f58a108ee632344cafad4c0ee6ab6096dbdb297c53b1381
4
- data.tar.gz: 6e676513f8aaee937f2dbb28316452a0dcd38f850afdc31a5ef6685200189814
3
+ metadata.gz: f3c9f4727d6069408bb795a4754c1b3f7098a997b7b6b47a9e114c4c04602f17
4
+ data.tar.gz: f7490d4f57e2e845740713cddff88fd558070f7ab4481598bf08fc504c33802c
5
5
  SHA512:
6
- metadata.gz: 4b0c5fc257ae4e8b667f0611f708e744c806de1f319e016e42df3553273c0cda445fe23f29e76ed8ef980eaf4680c21ecd7dccdf4602b721e5731981eace3097
7
- data.tar.gz: 563368859fa3684b8baa54f25e788c91f76d46c71b56bfa271fd9d6db2b9e6f74ad4d5937488b42c5f0912fdcc77e09432c214fde7d5c76dca85884e594912cf
6
+ metadata.gz: b973456ea806aef36d4b8328ae2740f624e31bf169d4861d0c6e095133a2fe33e41780e11830c4f891b2d72204873e20225575130e59f5ab55fe6e496a9c0c45
7
+ data.tar.gz: 218edd15c127f9d705759d1f32dbedbcd68615d1c4d77d4ca8e63b0d0d12801c759444cc4d4967c81e3cc4d88b4a90fb76792794d69b29a25395561bc78eaf0e
data/README.md CHANGED
@@ -60,6 +60,10 @@ And then execute:
60
60
  Or install it yourself as:
61
61
 
62
62
  $ gem install iguvium
63
+
64
+ If you're not a developer and have a Mac, you maybe have default Ruby installation and no development tools installed.
65
+
66
+ In this case, run `xcode-select --install` beforehand, and after that install Iguvium as admin: `sudo gem install iguvium`
63
67
 
64
68
  ## Usage
65
69
 
@@ -85,10 +89,12 @@ iguvium filename.pdf [options]
85
89
  -p, --pages page numbers, comma-separated, no spaces
86
90
  -i, --images use pictures in pdf (usually a bad idea)
87
91
  -n, --newlines keep newlines
92
+ -t, --text extract full page text instead of tables
88
93
  --verbose verbose output
89
94
  ```
90
95
 
91
- Given a filename, it generates CSV files for the tables detected
96
+ Given a filename, it generates CSV files for the tables detected or, with `-t` option,
97
+ just page text. The latter is useful in case of whitespace-separated fixed-width tables.
92
98
 
93
99
  ## Implementation details
94
100
  There are usually no actual tables in PDFs, only characters with coordinates,
data/exe/iguvium CHANGED
@@ -9,6 +9,7 @@ opts = Slop.parse { |o|
9
9
  o.array '-p', '--pages', 'page numbers, comma-separated, no spaces'
10
10
  o.bool '-i', '--images', 'use pictures in pdf (usually a bad idea)'
11
11
  o.bool '-n', '--newlines', 'keep newlines'
12
+ o.bool '-t', '--text', 'extract full page text instead of tables'
12
13
  o.bool '--verbose', 'verbose output'
13
14
  o.on '--version', 'print the version' do
14
15
  puts Iguvium::VERSION
@@ -36,6 +37,17 @@ page_numbers = pages.count.times.to_a if page_numbers.empty?
36
37
  # puts page_numbers.inspect
37
38
  # puts opts.to_hash.inspect
38
39
 
40
+ if opts[:text]
41
+ page_numbers.each do |number|
42
+ print "Extracting page #{number + 1}... "
43
+ txt = pages[number].text
44
+ txt_file = "#{path.gsub(/\.pdf$/, '')}_page_#{number + 1}.txt"
45
+ puts "Saving #{File.expand_path txt_file}"
46
+ File.write txt_file, txt
47
+ end
48
+ exit
49
+ end
50
+
39
51
  page_numbers.each do |number|
40
52
  print "Extracting page #{number + 1}... "
41
53
  tables = pages[number].extract_tables!(images: opts[:images])
@@ -45,7 +57,7 @@ page_numbers.each do |number|
45
57
  csv = table.to_a(newlines: opts[:newlines]).map(&:to_csv).join
46
58
  next if csv.empty?
47
59
  csv_file = "#{path.gsub(/\.pdf$/, '')}_page_#{number + 1}_table_#{i}.csv"
48
- puts "Saving #{File.expand_path(csv_file, __dir__)}"
60
+ puts "Saving #{File.expand_path csv_file}"
49
61
  File.write csv_file, csv
50
62
  end
51
63
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Iguvium
4
- VERSION = '0.8.0'
4
+ VERSION = '0.8.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iguvium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dima Ermilov
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-11-19 00:00:00.000000000 Z
11
+ date: 2018-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pdf-reader