iguvium 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cf00782b6d5ec4d4e06d12ce386ff8ece314f8ba896bb70117232ec100ec5cdb
4
- data.tar.gz: 544dbce4b3cfacd303f711e774284cd497b9b3dcdc8de0590b4825f84ff0e399
3
+ metadata.gz: 59643f281def94149de22d6b858f1f90f4b4b05289ef4f3ef13f146e4621f6c9
4
+ data.tar.gz: 7f10445415eadcc746b8d21050443595ecf127c7f6856a8b2962f8d072825447
5
5
  SHA512:
6
- metadata.gz: b6340302943733d007a37e16206337c712646199ab9133699d328cb44c4a1ea6c1e4f65fb61c559467b018d1232ccd35c03e48b760f67645292b66dbc2640ed1
7
- data.tar.gz: 0b9dcce49fc880a1fa7a7d0b512e3037512800dd4531caad230d788cb5297647190591fd27057e0216d92ed32465b9b60bbbc38eb34d68c6a324f5f06edb6d43
6
+ metadata.gz: 7d3899e42a5bcaa6359554ff96d803f4a09acdacbb53855887bdedfda9245604cf24054a3588508d58222cf4c7783d031b36bff9c6e3e0cac2476c6552a54dc4
7
+ data.tar.gz: 8b6011b52aa2232597d1dea4ef087d36d4713879a0a6c76aba3849337f348329357499e8a5d8c3ec9ace4346e64a058d5b6fd415626b6553dfa2f29f6d516278
@@ -2,6 +2,11 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [0.9.1] - 2019-05-30
6
+ ### Fixed
7
+ - Rare `undefined method 'begin' for nil:NilClass` error fixed
8
+ - Remove Iguvium::Table#to_a result caching
9
+
5
10
  ## [0.9.0] - 2018-12-07
6
11
  ### Added
7
12
  - Open cells rendering added. Tables like this are now processed correctly:
@@ -11,8 +11,8 @@ Gem::Specification.new do |spec|
11
11
  spec.email = ['dima@scriptangle.com']
12
12
 
13
13
  spec.summary = 'Extract tables from PDF as a structured info'
14
- spec.description = 'Extract tables from PDF as a structured info. Uses ghostscript to print pdf to image, \
15
- then recognizes table separators optically. No OpenCV or other heavy dependencies'
14
+ spec.description = "Extract tables from PDF as a structured info. Uses ghostscript to print pdf \
15
+ to image, then recognizes table separators optically. No OpenCV or other heavy dependencies"
16
16
  spec.homepage = 'https://github.com/adworse/iguvium'
17
17
  spec.license = 'MIT'
18
18
 
@@ -29,9 +29,9 @@ then recognizes table separators optically. No OpenCV or other heavy dependencie
29
29
 
30
30
  spec.require_paths = ['lib']
31
31
 
32
- spec.add_dependency 'pdf-reader', '~> 2.1'
33
32
  spec.add_dependency 'convolver-light', '~> 0.3.1'
34
33
  spec.add_dependency 'oily_png', '~> 1.2'
34
+ spec.add_dependency 'pdf-reader', '~> 2.1'
35
35
  spec.add_dependency 'slop', '~> 4.2'
36
36
 
37
37
  spec.add_development_dependency 'bundler', '~> 1.16'
@@ -116,11 +116,4 @@ module Iguvium
116
116
  end
117
117
  end
118
118
 
119
- # TODO: 4) Add options like maybe image thresholding
120
- #
121
- # TODO: 6) 0.9 - version capable of reading tables with open outer cells, like this:
122
- # __|____|_______|_____|
123
- # __|____|_______|_____|
124
- # __|____|_______|_____|
125
- #
126
119
  # TODO: 7) 1.0 - in addition it should deal with merged cells (move result to the upper left cell).
@@ -75,7 +75,10 @@ module Iguvium
75
75
  recognized = CV.new(image).recognize
76
76
  @lines = recognized[:lines]
77
77
  @boxes = recognized[:boxes].reject { |box| box_empty?(box) }
78
- @tables = @boxes.map { |box| Table.new(box, self) }.reverse
78
+ @tables = @boxes
79
+ .map { |box| Table.new(box, self) }
80
+ .reject { |table| table.grid[:rows].empty? || table.grid[:columns].empty? }
81
+ .reverse
79
82
  self
80
83
  end
81
84
 
@@ -32,17 +32,16 @@ module Iguvium
32
32
  # @return [Array] 2D array of strings (content of table's cells)
33
33
  #
34
34
  def to_a(newlines: false, phrases: true)
35
- @to_a ||=
36
- grid[:rows]
35
+ grid[:rows]
37
36
  .reverse
38
37
  .map { |row|
39
- grid[:columns].map do |column|
40
- render(
41
- phrases ? words_inside(column, row) : chars_inside(column, row),
42
- newlines: newlines
43
- )
44
- end
45
- }
38
+ grid[:columns].map do |column|
39
+ render(
40
+ phrases ? words_inside(column, row) : chars_inside(column, row),
41
+ newlines: newlines
42
+ )
43
+ end
44
+ }
46
45
  end
47
46
 
48
47
  # def width
@@ -61,6 +60,14 @@ module Iguvium
61
60
  # @floorless
62
61
  # end
63
62
 
63
+ def grid
64
+ @grid ||=
65
+ {
66
+ rows: lines_to_ranges(lines[:horizontal]),
67
+ columns: lines_to_ranges(lines[:vertical])
68
+ }
69
+ end
70
+
64
71
  private
65
72
 
66
73
  attr_reader :page, :lines, :box
@@ -69,8 +76,8 @@ module Iguvium
69
76
  # and adds rows and/or columns if necessary.
70
77
  # @return [Iguvium::Table] with added open-cell rows and columns
71
78
  def heal
72
- heal_rows
73
- heal_cols
79
+ heal_rows unless grid[:rows].empty?
80
+ heal_cols unless grid[:columns].empty?
74
81
  self
75
82
  end
76
83
 
@@ -89,7 +96,6 @@ module Iguvium
89
96
  end
90
97
 
91
98
  def heal_rows
92
- # TODO: shrink box (like `box.last.end - 2`)
93
99
  roofrow = box.last.begin..grid[:rows].first.begin
94
100
  floorrow = grid[:rows].last.end..box.last.end
95
101
  if chars_inside(box.first, roofrow).any?
@@ -125,18 +131,7 @@ module Iguvium
125
131
  }
126
132
  end
127
133
 
128
- def grid
129
- return @grid if @grid
130
-
131
- @grid =
132
- {
133
- rows: lines_to_ranges(lines[:horizontal]),
134
- columns: lines_to_ranges(lines[:vertical])
135
- }
136
- end
137
-
138
134
  def lines_to_ranges(lines)
139
- # TODO: extend box for the sake of lines select
140
135
  lines.select { |line| line_in_box?(line, wide_box) }
141
136
  .map { |line| line.first.is_a?(Numeric) ? line.first : line.last }
142
137
  .sort
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Iguvium
4
- VERSION = '0.9.0'
4
+ VERSION = '0.9.1'
5
5
  end
metadata CHANGED
@@ -1,57 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iguvium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dima Ermilov
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-12-07 00:00:00.000000000 Z
11
+ date: 2019-05-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: pdf-reader
14
+ name: convolver-light
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.1'
19
+ version: 0.3.1
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.1'
26
+ version: 0.3.1
27
27
  - !ruby/object:Gem::Dependency
28
- name: convolver-light
28
+ name: oily_png
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.3.1
33
+ version: '1.2'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.3.1
40
+ version: '1.2'
41
41
  - !ruby/object:Gem::Dependency
42
- name: oily_png
42
+ name: pdf-reader
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.2'
47
+ version: '2.1'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1.2'
54
+ version: '2.1'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: slop
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -108,9 +108,9 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '3.0'
111
- description: |-
112
- Extract tables from PDF as a structured info. Uses ghostscript to print pdf to image, \
113
- then recognizes table separators optically. No OpenCV or other heavy dependencies
111
+ description: Extract tables from PDF as a structured info. Uses ghostscript to print
112
+ pdf to image, then recognizes table separators optically. No OpenCV or other heavy
113
+ dependencies
114
114
  email:
115
115
  - dima@scriptangle.com
116
116
  executables: