pdf-extract 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/test9.mask.pdf +0 -0
- data/lib/references/references.rb +6 -1
- metadata +16 -15
data/bin/test9.mask.pdf
ADDED
Binary file
|
@@ -162,12 +162,14 @@ module PdfExtract
|
|
162
162
|
|
163
163
|
def self.numeric_sequence? pdf, content
|
164
164
|
last_n = -1
|
165
|
+
first_n = -1
|
165
166
|
seq_count = 0
|
166
167
|
content.scan /\d+/ do |m|
|
167
168
|
# Avoid misinterpreting years as sequence
|
168
169
|
if m.to_i < pdf.settings[:max_reference_order]
|
169
170
|
if last_n == -1
|
170
171
|
last_n = m.to_i
|
172
|
+
first_n = m.to_i if first_n == -1
|
171
173
|
elsif last_n.next == m.to_i
|
172
174
|
last_n = last_n.next
|
173
175
|
seq_count = seq_count.next
|
@@ -175,7 +177,10 @@ module PdfExtract
|
|
175
177
|
end
|
176
178
|
end
|
177
179
|
|
178
|
-
|
180
|
+
# Sequence must be long enough and first number of sequence
|
181
|
+
# must appear near the very start of content.
|
182
|
+
large_enough = seq_count >= pdf.settings[:min_sequence_count]
|
183
|
+
large_enough && content[0..30] =~ /#{first_n.to_s}/
|
179
184
|
end
|
180
185
|
|
181
186
|
def self.include_in pdf
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-extract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2012-04-16 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: pdf-reader
|
16
|
-
requirement: &
|
16
|
+
requirement: &70203404883800 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.1.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70203404883800
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: nokogiri
|
27
|
-
requirement: &
|
27
|
+
requirement: &70203404882720 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.5.0
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70203404882720
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: prawn
|
38
|
-
requirement: &
|
38
|
+
requirement: &70203404880220 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 0.11.1
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70203404880220
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: sqlite3
|
49
|
-
requirement: &
|
49
|
+
requirement: &70203404877980 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.3.4
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70203404877980
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: commander
|
60
|
-
requirement: &
|
60
|
+
requirement: &70203404877160 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 4.0.4
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70203404877160
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: json
|
71
|
-
requirement: &
|
71
|
+
requirement: &70203404966560 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 1.5.1
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70203404966560
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: libsvm-ruby-swig
|
82
|
-
requirement: &
|
82
|
+
requirement: &70203404965180 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,7 +87,7 @@ dependencies:
|
|
87
87
|
version: 0.4.0
|
88
88
|
type: :runtime
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *70203404965180
|
91
91
|
description:
|
92
92
|
email:
|
93
93
|
- kward@crossref.org
|
@@ -104,6 +104,7 @@ files:
|
|
104
104
|
- bin/test4.mask.pdf
|
105
105
|
- bin/test5.mask.pdf
|
106
106
|
- bin/test6.mask.pdf
|
107
|
+
- bin/test9.mask.pdf
|
107
108
|
- bin/tmp.txt
|
108
109
|
- bin/train.rb
|
109
110
|
- lib/analysis/columns.rb
|