pdf-extract 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
@@ -162,12 +162,14 @@ module PdfExtract
162
162
 
163
163
  def self.numeric_sequence? pdf, content
164
164
  last_n = -1
165
+ first_n = -1
165
166
  seq_count = 0
166
167
  content.scan /\d+/ do |m|
167
168
  # Avoid misinterpreting years as sequence
168
169
  if m.to_i < pdf.settings[:max_reference_order]
169
170
  if last_n == -1
170
171
  last_n = m.to_i
172
+ first_n = m.to_i if first_n == -1
171
173
  elsif last_n.next == m.to_i
172
174
  last_n = last_n.next
173
175
  seq_count = seq_count.next
@@ -175,7 +177,10 @@ module PdfExtract
175
177
  end
176
178
  end
177
179
 
178
- seq_count >= pdf.settings[:min_sequence_count]
180
+ # Sequence must be long enough and first number of sequence
181
+ # must appear near the very start of content.
182
+ large_enough = seq_count >= pdf.settings[:min_sequence_count]
183
+ large_enough && content[0..30] =~ /#{first_n.to_s}/
179
184
  end
180
185
 
181
186
  def self.include_in pdf
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-extract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2012-04-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: pdf-reader
16
- requirement: &70168030043520 !ruby/object:Gem::Requirement
16
+ requirement: &70203404883800 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.1.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70168030043520
24
+ version_requirements: *70203404883800
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: nokogiri
27
- requirement: &70168030042660 !ruby/object:Gem::Requirement
27
+ requirement: &70203404882720 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.5.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70168030042660
35
+ version_requirements: *70203404882720
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: prawn
38
- requirement: &70168030041940 !ruby/object:Gem::Requirement
38
+ requirement: &70203404880220 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 0.11.1
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70168030041940
46
+ version_requirements: *70203404880220
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: sqlite3
49
- requirement: &70168030041260 !ruby/object:Gem::Requirement
49
+ requirement: &70203404877980 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 1.3.4
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70168030041260
57
+ version_requirements: *70203404877980
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: commander
60
- requirement: &70168030040600 !ruby/object:Gem::Requirement
60
+ requirement: &70203404877160 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 4.0.4
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70168030040600
68
+ version_requirements: *70203404877160
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: json
71
- requirement: &70168030040000 !ruby/object:Gem::Requirement
71
+ requirement: &70203404966560 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.5.1
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70168030040000
79
+ version_requirements: *70203404966560
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: libsvm-ruby-swig
82
- requirement: &70168030039440 !ruby/object:Gem::Requirement
82
+ requirement: &70203404965180 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: 0.4.0
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *70168030039440
90
+ version_requirements: *70203404965180
91
91
  description:
92
92
  email:
93
93
  - kward@crossref.org
@@ -104,6 +104,7 @@ files:
104
104
  - bin/test4.mask.pdf
105
105
  - bin/test5.mask.pdf
106
106
  - bin/test6.mask.pdf
107
+ - bin/test9.mask.pdf
107
108
  - bin/tmp.txt
108
109
  - bin/train.rb
109
110
  - lib/analysis/columns.rb