pdf-extract 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Binary file
@@ -162,12 +162,14 @@ module PdfExtract
162
162
 
163
163
  def self.numeric_sequence? pdf, content
164
164
  last_n = -1
165
+ first_n = -1
165
166
  seq_count = 0
166
167
  content.scan /\d+/ do |m|
167
168
  # Avoid misinterpreting years as sequence
168
169
  if m.to_i < pdf.settings[:max_reference_order]
169
170
  if last_n == -1
170
171
  last_n = m.to_i
172
+ first_n = m.to_i if first_n == -1
171
173
  elsif last_n.next == m.to_i
172
174
  last_n = last_n.next
173
175
  seq_count = seq_count.next
@@ -175,7 +177,10 @@ module PdfExtract
175
177
  end
176
178
  end
177
179
 
178
- seq_count >= pdf.settings[:min_sequence_count]
180
+ # Sequence must be long enough and first number of sequence
181
+ # must appear near the very start of content.
182
+ large_enough = seq_count >= pdf.settings[:min_sequence_count]
183
+ large_enough && content[0..30] =~ /#{first_n.to_s}/
179
184
  end
180
185
 
181
186
  def self.include_in pdf
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-extract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2012-04-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: pdf-reader
16
- requirement: &70168030043520 !ruby/object:Gem::Requirement
16
+ requirement: &70203404883800 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.1.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70168030043520
24
+ version_requirements: *70203404883800
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: nokogiri
27
- requirement: &70168030042660 !ruby/object:Gem::Requirement
27
+ requirement: &70203404882720 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.5.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70168030042660
35
+ version_requirements: *70203404882720
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: prawn
38
- requirement: &70168030041940 !ruby/object:Gem::Requirement
38
+ requirement: &70203404880220 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 0.11.1
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70168030041940
46
+ version_requirements: *70203404880220
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: sqlite3
49
- requirement: &70168030041260 !ruby/object:Gem::Requirement
49
+ requirement: &70203404877980 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 1.3.4
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70168030041260
57
+ version_requirements: *70203404877980
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: commander
60
- requirement: &70168030040600 !ruby/object:Gem::Requirement
60
+ requirement: &70203404877160 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 4.0.4
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70168030040600
68
+ version_requirements: *70203404877160
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: json
71
- requirement: &70168030040000 !ruby/object:Gem::Requirement
71
+ requirement: &70203404966560 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.5.1
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70168030040000
79
+ version_requirements: *70203404966560
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: libsvm-ruby-swig
82
- requirement: &70168030039440 !ruby/object:Gem::Requirement
82
+ requirement: &70203404965180 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: 0.4.0
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *70168030039440
90
+ version_requirements: *70203404965180
91
91
  description:
92
92
  email:
93
93
  - kward@crossref.org
@@ -104,6 +104,7 @@ files:
104
104
  - bin/test4.mask.pdf
105
105
  - bin/test5.mask.pdf
106
106
  - bin/test6.mask.pdf
107
+ - bin/test9.mask.pdf
107
108
  - bin/tmp.txt
108
109
  - bin/train.rb
109
110
  - lib/analysis/columns.rb