anystyle 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/HISTORY.md +8 -0
- data/lib/anystyle.rb +1 -0
- data/lib/anystyle/document.rb +2 -2
- data/lib/anystyle/feature/keyword.rb +23 -7
- data/lib/anystyle/feature/line.rb +1 -1
- data/lib/anystyle/feature/quotes.rb +4 -4
- data/lib/anystyle/feature/words.rb +4 -0
- data/lib/anystyle/finder.rb +13 -4
- data/lib/anystyle/format/csl.rb +17 -0
- data/lib/anystyle/normalizer/arxiv.rb +15 -0
- data/lib/anystyle/normalizer/container.rb +5 -1
- data/lib/anystyle/normalizer/locator.rb +9 -1
- data/lib/anystyle/normalizer/names.rb +1 -1
- data/lib/anystyle/normalizer/punctuation.rb +4 -2
- data/lib/anystyle/parser.rb +2 -1
- data/lib/anystyle/refs.rb +41 -20
- data/lib/anystyle/support/finder.mod +7089 -5838
- data/lib/anystyle/support/parser.mod +17212 -12817
- data/lib/anystyle/utils.rb +11 -7
- data/lib/anystyle/version.rb +1 -1
- data/res/finder/bb132pr2055.ttx +3 -3
- data/res/finder/bb408gp7470.ttx +3 -3
- data/res/finder/bc605xz1554.ttx +1 -1
- data/res/finder/bd040gx5718.ttx +3 -3
- data/res/finder/bd413nt2715.ttx +7 -7
- data/res/finder/bg599vt3743.ttx +2 -2
- data/res/parser/bad.xml +0 -55
- data/res/parser/core.xml +2113 -155
- data/res/parser/gold.xml +10428 -7117
- data/res/parser/good.xml +2 -1048
- data/res/parser/ugly.xml +2 -1393
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 4a8c6471369e8969b190536c6f597742e6917197ce26009b1f7c7dcd1ec32168
|
|
4
|
+
data.tar.gz: 1f9af1e1337c47fda651b40fd19903d7474fe860b679bcafd832b932fc075947
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 504a133a3cefedeb24fc9c165e00c9f59f6dfee14b78fa0206b9eadb2d060e236b8e99fe211d94d8df1de9d9c93efc8d3149c931827c5df83bfa56524539ce55
|
|
7
|
+
data.tar.gz: 70a9e1c156fe996980610755971eb4feb899afd2a57e20cbd95c664618326385b88ef8c733268922977a7252b2fff47771ba69892f37eecc94227545c4e956b3
|
data/HISTORY.md
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
1.3.0 / 2018-09-18
|
|
2
|
+
==================
|
|
3
|
+
* Updated and improved normalizers and CSL format.
|
|
4
|
+
* Improved Chinese reference tokenization.
|
|
5
|
+
* Added option to customizee pdftotext path.
|
|
6
|
+
* Improved Finder reference line joining.
|
|
7
|
+
* Improved Finder model; training sets.
|
|
8
|
+
* Improved Parser model; training sets.
|
|
1
9
|
1.2.1 / 2018-08-17
|
|
2
10
|
==================
|
|
3
11
|
* Added check and train commands to CLI.
|
data/lib/anystyle.rb
CHANGED
|
@@ -28,6 +28,7 @@ require 'anystyle/feature/terminal'
|
|
|
28
28
|
require 'anystyle/feature/words'
|
|
29
29
|
|
|
30
30
|
require 'anystyle/normalizer'
|
|
31
|
+
require 'anystyle/normalizer/arxiv'
|
|
31
32
|
require 'anystyle/normalizer/brackets'
|
|
32
33
|
require 'anystyle/normalizer/container'
|
|
33
34
|
require 'anystyle/normalizer/date'
|
data/lib/anystyle/document.rb
CHANGED
|
@@ -27,8 +27,8 @@ module AnyStyle
|
|
|
27
27
|
|
|
28
28
|
case format.downcase
|
|
29
29
|
when '.pdf'
|
|
30
|
-
meta = pdf_meta path if opts[:parse_meta]
|
|
31
|
-
info = pdf_info path if opts[:parse_info]
|
|
30
|
+
meta = pdf_meta path, **opts if opts[:parse_meta]
|
|
31
|
+
info = pdf_info path, **opts if opts[:parse_info]
|
|
32
32
|
input = pdf_to_text path, **opts
|
|
33
33
|
when '.ttx'
|
|
34
34
|
tagged = true
|
|
@@ -9,11 +9,15 @@ module AnyStyle
|
|
|
9
9
|
case alpha
|
|
10
10
|
when /^ed(s|itors?|ited?|iteurs?)?$/i,
|
|
11
11
|
/^(hg|hrsg|herausgeber)$/i,
|
|
12
|
-
/^(compilador)$/i
|
|
12
|
+
/^(compilador)$/i,
|
|
13
|
+
/編/
|
|
13
14
|
:editor
|
|
15
|
+
when /著|撰/,
|
|
16
|
+
:author
|
|
14
17
|
when /^trans(l(ated|ators?|ation))?$/i,
|
|
15
18
|
/^übers(etz(t|ung))?$/i,
|
|
16
|
-
/^trad(uction|ucteurs?|uit)?$/i
|
|
19
|
+
/^trad(uction|ucteurs?|uit)?$/i,
|
|
20
|
+
/譯/
|
|
17
21
|
:translator
|
|
18
22
|
when /^(dissertation|thesis)$/i
|
|
19
23
|
:thesis
|
|
@@ -21,7 +25,7 @@ module AnyStyle
|
|
|
21
25
|
:proceedings
|
|
22
26
|
when /^(Journal|Zeitschrift|Quarterly|Magazine?|Times|Rev(iew|vue)?|Bulletin|News|Week|Gazett[ea])/
|
|
23
27
|
:journal
|
|
24
|
-
when /^in$/i
|
|
28
|
+
when /^in$/i, /收入/
|
|
25
29
|
:in
|
|
26
30
|
when /^([AaUu]nd|y|e)$/
|
|
27
31
|
:and
|
|
@@ -29,21 +33,33 @@ module AnyStyle
|
|
|
29
33
|
:etal
|
|
30
34
|
when /^(pp?|pages?|S(eiten?)?|ff?)$/
|
|
31
35
|
:page
|
|
32
|
-
when /^(vol(ume)?s?|iss(ue)?|n[or]?|number|fasc(icle|icule)?)$/i
|
|
36
|
+
when /^(vol(ume)?s?|iss(ue)?|n[or]?|number|fasc(icle|icule)?|suppl(ement)?)$/i
|
|
33
37
|
:volume
|
|
34
38
|
when /^(ser(ies?)?|reihe|[ck]oll(e[ck]tion))$/i
|
|
35
39
|
:series
|
|
40
|
+
when /^patent$/i
|
|
41
|
+
:patent
|
|
42
|
+
when /^report$/i
|
|
43
|
+
:report
|
|
36
44
|
when /^(edn|edition|expanded|rev(ised)?|p?reprint(ed)?|illustrated)$/i,
|
|
45
|
+
/^editio|aucta$/i
|
|
37
46
|
/^(aufl(age)?|\p{Alpha}*ausg(abe)?)$/i
|
|
38
47
|
:edition
|
|
39
48
|
when /^(nd|date|spring|s[uo]mmer|autumn|fall|winter|frühling|herbst)$/i,
|
|
40
49
|
/^(jan(uary?)?|feb(ruary?)?|mar(ch|z)?|apr(il)?|ma[yi]|jun[ei]?)$/,
|
|
41
|
-
/^(jul[yi]?|aug(ust)?|sep(tember)?|o[ck]t(ober)?|nov(ember)?|de[cz](ember)?)$/i
|
|
50
|
+
/^(jul[yi]?|aug(ust)?|sep(tember)?|o[ck]t(ober)?|nov(ember)?|de[cz](ember)?)$/i,
|
|
51
|
+
/年/
|
|
42
52
|
:date
|
|
43
|
-
when /^(
|
|
53
|
+
when /^(doi|url)/i
|
|
44
54
|
:locator
|
|
45
|
-
when /^(
|
|
55
|
+
when /^(pmid|pmcid)/i
|
|
56
|
+
:pubmed
|
|
57
|
+
when /^(arxiv)/i
|
|
58
|
+
:arxiv
|
|
59
|
+
when /^(retrieved|retirado|accessed)$/i
|
|
46
60
|
:accessed
|
|
61
|
+
when /^[ILXVMCD]{2,}$/
|
|
62
|
+
:roman
|
|
47
63
|
else
|
|
48
64
|
:none
|
|
49
65
|
end
|
|
@@ -32,7 +32,7 @@ module AnyStyle
|
|
|
32
32
|
:list
|
|
33
33
|
when /^(\p{Lu}\.?)\s*(\d+\.)+\s+\p{L}+/
|
|
34
34
|
:title
|
|
35
|
-
when /^(\w+\s)?(tab(le|elle|\.)|fig(ure|\.))/i
|
|
35
|
+
when /^(\w+\s)?(tab(le|elle|\.)|fig(ure|\.)|equation|graph|abb(ildung)?)/i
|
|
36
36
|
:cap
|
|
37
37
|
when /^\p{Pd}?\d+\p{Pd}?$/, /^[ivx]+$/i
|
|
38
38
|
:num
|
|
@@ -3,13 +3,13 @@ module AnyStyle
|
|
|
3
3
|
class Quotes < Feature
|
|
4
4
|
def observe(token, **opts)
|
|
5
5
|
case token
|
|
6
|
-
when /^[^"'
|
|
6
|
+
when /^[^"'”„’‚´«「『〈《‘“`»」』〉》]+$/
|
|
7
7
|
:none
|
|
8
|
-
when /^["'
|
|
8
|
+
when /^["'”„’‚´«「『〈《‘“`»].*["'”„’‚´«‘“`»」』〉》][,;:\p{Pd}!\?\.]?$/
|
|
9
9
|
:'quote-unquote'
|
|
10
|
-
when /^["'
|
|
10
|
+
when /^["'”„’‚´«「『‘〈《“`»]/
|
|
11
11
|
:quote
|
|
12
|
-
when /["'
|
|
12
|
+
when /["'”„’‚´«‘“`»」』〉》][,;:\p{Pd}!\?\.]?$/
|
|
13
13
|
:unquote
|
|
14
14
|
else
|
|
15
15
|
:other
|
data/lib/anystyle/finder.rb
CHANGED
|
@@ -8,7 +8,10 @@ module AnyStyle
|
|
|
8
8
|
compact: true,
|
|
9
9
|
threads: 4,
|
|
10
10
|
format: :references,
|
|
11
|
-
training_data: Dir[File.join(RES, 'finder', '*.ttx')].map(&:untaint)
|
|
11
|
+
training_data: Dir[File.join(RES, 'finder', '*.ttx')].map(&:untaint),
|
|
12
|
+
layout: true,
|
|
13
|
+
pdftotext: 'pdftotext',
|
|
14
|
+
pdfinfo: 'pdfinfo'
|
|
12
15
|
}
|
|
13
16
|
|
|
14
17
|
def initialize(options = {})
|
|
@@ -71,12 +74,18 @@ module AnyStyle
|
|
|
71
74
|
})
|
|
72
75
|
end
|
|
73
76
|
|
|
74
|
-
def prepare(input,
|
|
77
|
+
def prepare(input,
|
|
78
|
+
layout: options[:layout],
|
|
79
|
+
crop: false,
|
|
80
|
+
pdftotext: options[:pdftotext],
|
|
81
|
+
pdfinfo: options[:pdfinfo],
|
|
82
|
+
**opts)
|
|
83
|
+
doc_opts = { layout: layout, crop: crop, pdftotext: pdftotext, pdfinfo: pdfinfo, **opts }
|
|
75
84
|
case input
|
|
76
85
|
when String
|
|
77
|
-
super(Document.open(input,
|
|
86
|
+
super(Document.open(input, **doc_opts), **opts)
|
|
78
87
|
when Array
|
|
79
|
-
super(Wapiti::Dataset.new(input.map { |f| Document.open(f, **
|
|
88
|
+
super(Wapiti::Dataset.new(input.map { |f| Document.open(f, **doc_opts) }), **opts)
|
|
80
89
|
else
|
|
81
90
|
super(input, **opts)
|
|
82
91
|
end
|
data/lib/anystyle/format/csl.rb
CHANGED
|
@@ -3,6 +3,7 @@ module AnyStyle
|
|
|
3
3
|
module CSL
|
|
4
4
|
def format_csl(dataset, **opts)
|
|
5
5
|
format_hash(dataset).map do |hash|
|
|
6
|
+
dates_to_citeproc(hash, **opts) if hash.key?(:date)
|
|
6
7
|
flatten_values hash, skip: Normalizer::Names.keys
|
|
7
8
|
|
|
8
9
|
rename_value hash, :pages, :page
|
|
@@ -23,6 +24,22 @@ module AnyStyle
|
|
|
23
24
|
end
|
|
24
25
|
|
|
25
26
|
alias_method :format_citeproc, :format_csl
|
|
27
|
+
|
|
28
|
+
def dates_to_citeproc(hash, date_format: 'edtf', **opts)
|
|
29
|
+
date, = *hash.delete(:date)
|
|
30
|
+
|
|
31
|
+
case date_format.to_s
|
|
32
|
+
when 'citeproc'
|
|
33
|
+
hash[:issued] = begin
|
|
34
|
+
require 'citeproc'
|
|
35
|
+
::CiteProc::Date.parse!(date.tr('X~', 'u?')).to_citeproc.symbolize_keys
|
|
36
|
+
rescue
|
|
37
|
+
date
|
|
38
|
+
end
|
|
39
|
+
else
|
|
40
|
+
hash[:issued] = date
|
|
41
|
+
end
|
|
42
|
+
end
|
|
26
43
|
end
|
|
27
44
|
end
|
|
28
45
|
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
module AnyStyle
|
|
2
|
+
class Normalizer
|
|
3
|
+
class ArXiv < Normalizer
|
|
4
|
+
@keys = [:note]
|
|
5
|
+
|
|
6
|
+
def normalize(item, **opts)
|
|
7
|
+
each_value(item) do |_, value|
|
|
8
|
+
if (value =~ /arxiv:?\s*(\d{4}\.\d+(?:v\d+)?|\w+(?:.\w+)?\/\d+)/i)
|
|
9
|
+
append item, :arxiv, $1
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -5,7 +5,11 @@ module AnyStyle
|
|
|
5
5
|
|
|
6
6
|
def normalize(item, **opts)
|
|
7
7
|
map_values(item) do |_, value|
|
|
8
|
-
value
|
|
8
|
+
value
|
|
9
|
+
.sub(/^[Ii]n(?::|\s+the)?\s+(\p{^Ll})/, '\1')
|
|
10
|
+
.sub(/^of\s+/, '')
|
|
11
|
+
.sub(/^收入/, '')
|
|
12
|
+
.sub(/^(\w+ )?presented at (the )?/i, '')
|
|
9
13
|
end
|
|
10
14
|
end
|
|
11
15
|
end
|
|
@@ -3,7 +3,7 @@ module AnyStyle
|
|
|
3
3
|
|
|
4
4
|
class Normalizer
|
|
5
5
|
class Locator < Normalizer
|
|
6
|
-
@keys = [:isbn, :url]
|
|
6
|
+
@keys = [:isbn, :url, :doi]
|
|
7
7
|
|
|
8
8
|
def normalize(item, **opts)
|
|
9
9
|
map_values(item) do |key, value|
|
|
@@ -11,12 +11,20 @@ module AnyStyle
|
|
|
11
11
|
when :isbn
|
|
12
12
|
value[/[\d-]+/]
|
|
13
13
|
when :url
|
|
14
|
+
doi = doi_extract(value)
|
|
15
|
+
append item, :doi, doi unless doi.nil?
|
|
14
16
|
URI.extract(value)
|
|
17
|
+
when :doi
|
|
18
|
+
doi_extract(value) || value
|
|
15
19
|
else
|
|
16
20
|
value
|
|
17
21
|
end
|
|
18
22
|
end
|
|
19
23
|
end
|
|
20
24
|
end
|
|
25
|
+
|
|
26
|
+
def doi_extract(value)
|
|
27
|
+
value[/10\.(\d{4,9}\/[-._;()\/:A-Z0-9]+|1002\/\S+)/i]
|
|
28
|
+
end
|
|
21
29
|
end
|
|
22
30
|
end
|
|
@@ -14,8 +14,10 @@ module AnyStyle
|
|
|
14
14
|
|
|
15
15
|
def normalize(item, **opts)
|
|
16
16
|
each_value(item) do |_, value|
|
|
17
|
-
value.gsub!(
|
|
18
|
-
value.gsub!(
|
|
17
|
+
value.gsub!(/\s*[\)\]\.,:;\p{Pd}\p{Z}\p{C}。、》〉]+$/, '')
|
|
18
|
+
value.gsub!(/[,:;》〉]+$/, '')
|
|
19
|
+
value.gsub!(/^[\(\[《〈]/, '')
|
|
20
|
+
value.gsub!(/<\/?(italic|bold)>/, '')
|
|
19
21
|
end
|
|
20
22
|
end
|
|
21
23
|
end
|
data/lib/anystyle/parser.rb
CHANGED
|
@@ -100,7 +100,7 @@ module AnyStyle
|
|
|
100
100
|
compact: true,
|
|
101
101
|
threads: 4,
|
|
102
102
|
separator: /(?:\r?\n)+/,
|
|
103
|
-
delimiter: /\s
|
|
103
|
+
delimiter: /\s+|([\uFF01-\uFF64]|。|、)/,
|
|
104
104
|
format: :hash,
|
|
105
105
|
training_data: File.join(RES, 'parser', 'core.xml')
|
|
106
106
|
}
|
|
@@ -139,6 +139,7 @@ module AnyStyle
|
|
|
139
139
|
Normalizer::Locator.new,
|
|
140
140
|
Normalizer::Publisher.new,
|
|
141
141
|
Normalizer::PubMed.new,
|
|
142
|
+
Normalizer::ArXiv.new,
|
|
142
143
|
Normalizer::Names.new,
|
|
143
144
|
Normalizer::Locale.new,
|
|
144
145
|
Normalizer::Type.new
|
data/lib/anystyle/refs.rb
CHANGED
|
@@ -90,7 +90,7 @@ module AnyStyle
|
|
|
90
90
|
indent_score(indent),
|
|
91
91
|
delta_score(delta),
|
|
92
92
|
years_score(a, b),
|
|
93
|
-
terminal_score(a),
|
|
93
|
+
terminal_score(a, b),
|
|
94
94
|
initial_score(a, b),
|
|
95
95
|
length_score(a, b),
|
|
96
96
|
pages_score(a, b)
|
|
@@ -100,7 +100,7 @@ module AnyStyle
|
|
|
100
100
|
|
|
101
101
|
def indent_score(indent)
|
|
102
102
|
case
|
|
103
|
-
when indent > 0 then 1
|
|
103
|
+
when indent > 0 then 1.25
|
|
104
104
|
when indent < 0 then -1
|
|
105
105
|
else
|
|
106
106
|
0
|
|
@@ -119,10 +119,19 @@ module AnyStyle
|
|
|
119
119
|
|
|
120
120
|
def years_score(a, b)
|
|
121
121
|
if match_year?(a)
|
|
122
|
-
if
|
|
123
|
-
|
|
122
|
+
if match_year?(b)
|
|
123
|
+
case
|
|
124
|
+
when b.length < 18
|
|
125
|
+
1
|
|
126
|
+
when b.length < 25
|
|
127
|
+
0.5
|
|
128
|
+
when b.length > 60
|
|
129
|
+
-0.75
|
|
130
|
+
else
|
|
131
|
+
0
|
|
132
|
+
end
|
|
124
133
|
else
|
|
125
|
-
if a.match(/[\d,] (1[4-9]|2[01])\d\d[a-z]?\.$/)
|
|
134
|
+
if a.match(/[\d,] \(?(1[4-9]|2[01])\d\d[a-z]?\)?\.$/)
|
|
126
135
|
-0.5
|
|
127
136
|
else
|
|
128
137
|
1
|
|
@@ -138,10 +147,10 @@ module AnyStyle
|
|
|
138
147
|
end
|
|
139
148
|
|
|
140
149
|
def pages_score(a, b)
|
|
141
|
-
if match_pages?(a)
|
|
150
|
+
if match_pages?(a, true)
|
|
142
151
|
-0.25
|
|
143
152
|
else
|
|
144
|
-
if match_pages?(b)
|
|
153
|
+
if match_pages?(b, false)
|
|
145
154
|
1
|
|
146
155
|
else
|
|
147
156
|
0
|
|
@@ -149,23 +158,25 @@ module AnyStyle
|
|
|
149
158
|
end
|
|
150
159
|
end
|
|
151
160
|
|
|
152
|
-
def match_pages?(string)
|
|
161
|
+
def match_pages?(string, not_years = true)
|
|
153
162
|
m = string.match(/(\d+)\p{Pd}(\d+)|\bpp?\.|\d+\(\d+\)/)
|
|
154
163
|
return false if m.nil?
|
|
155
|
-
return false if m[1] && match_year?(m[1]) && match_year?(m[2])
|
|
164
|
+
return false if not_years && m[1] && match_year?(m[1]) && match_year?(m[2])
|
|
156
165
|
return true
|
|
157
166
|
end
|
|
158
167
|
|
|
159
|
-
def terminal_score(
|
|
160
|
-
case
|
|
161
|
-
when /https?:\/\/\w+/i
|
|
162
|
-
-
|
|
163
|
-
when /[,;:&\p{Pd}]
|
|
168
|
+
def terminal_score(a, b)
|
|
169
|
+
case
|
|
170
|
+
when a.match(/https?:\/\/\w+/i)
|
|
171
|
+
-0.25
|
|
172
|
+
when a.match(/[,;:&\p{Pd}]$/), a.match(/\s(et al|pp|pg)\.$/)
|
|
164
173
|
2
|
|
165
|
-
when /\((1[4-9]|2[01])\d\d\)\.?$/
|
|
174
|
+
when a.match(/\((1[4-9]|2[01])\d\d\)\.?$/)
|
|
166
175
|
0
|
|
167
|
-
when /(\p{^Lu}\.|\])$/
|
|
176
|
+
when a.match(/(\p{^Lu}\.|\])$/)
|
|
168
177
|
-1
|
|
178
|
+
when a.match(/\d$/) && b.match(/^\p{Lu}/)
|
|
179
|
+
-0.25
|
|
169
180
|
else
|
|
170
181
|
0
|
|
171
182
|
end
|
|
@@ -177,13 +188,15 @@ module AnyStyle
|
|
|
177
188
|
1.5
|
|
178
189
|
when a.match(/\p{L}$/) && b.match(/^\p{L}/)
|
|
179
190
|
1
|
|
180
|
-
when b.match(/^["'”„’‚´«「『‘“`»]/)
|
|
191
|
+
when b.match(/^["'”„’‚´«「『‘“`»]/)
|
|
181
192
|
1
|
|
193
|
+
when b.match(/^(url|doi|isbn|vol)\b/i)
|
|
194
|
+
1.5
|
|
182
195
|
when b.match(/^([\p{Pd}_*][\p{Pd}_* ]+|\p{Co})/)
|
|
183
196
|
-1.5
|
|
184
197
|
when b.match(/^\((1[4-9]|2[01])\d\d\)/) && !a.match(/(\p{Lu}|al|others)\.$/)
|
|
185
198
|
-1
|
|
186
|
-
when b.match(/^\p{Lu}\p{Ll}
|
|
199
|
+
when b.match(/^\p{Lu}[\p{Ll}-]+,?\s\p{Lu}/) && !a.match(/\p{L}$/)
|
|
187
200
|
-0.5
|
|
188
201
|
when match_list?(b)
|
|
189
202
|
if match_list?(a)
|
|
@@ -191,13 +204,15 @@ module AnyStyle
|
|
|
191
204
|
else
|
|
192
205
|
-0.75
|
|
193
206
|
end
|
|
207
|
+
when b.match(/^\p{L}+:/), b.match(/^\p{L}+ \d/)
|
|
208
|
+
0.5
|
|
194
209
|
else
|
|
195
210
|
0
|
|
196
211
|
end
|
|
197
212
|
end
|
|
198
213
|
|
|
199
214
|
def match_list?(string)
|
|
200
|
-
string.match(/^(\d{1,3}\.\s
|
|
215
|
+
string.match(/^(\d{1,3}(\.\s+|\s{2,})\p{L}|\[\p{Alnum}+\])/)
|
|
201
216
|
end
|
|
202
217
|
|
|
203
218
|
def length_score(a, b)
|
|
@@ -205,8 +220,14 @@ module AnyStyle
|
|
|
205
220
|
when b.length < a.length
|
|
206
221
|
case
|
|
207
222
|
when b.length < 10
|
|
223
|
+
2.5
|
|
224
|
+
when b.length < 15
|
|
208
225
|
2
|
|
226
|
+
when b.length < 20
|
|
227
|
+
1.75
|
|
209
228
|
when b.length < 25
|
|
229
|
+
1.5
|
|
230
|
+
when b.length < 30
|
|
210
231
|
1
|
|
211
232
|
when b.length < 50
|
|
212
233
|
0.75
|
|
@@ -221,7 +242,7 @@ module AnyStyle
|
|
|
221
242
|
end
|
|
222
243
|
|
|
223
244
|
def join(a, b)
|
|
224
|
-
if a
|
|
245
|
+
if a =~ /\p{Pd}$/
|
|
225
246
|
if a =~ /\p{Ll}-$/ && b =~ /^\p{Ll}/
|
|
226
247
|
"#{a[0...-1]}#{b}"
|
|
227
248
|
else
|