proiel 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/proiel/tokenization.rb +4 -2
- data/lib/proiel/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 66a5e6fc550b5d017d0b49396ec931624cc9cc12
|
|
4
|
+
data.tar.gz: a89f0936a25eb4092728ac527f8816095f721818
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c956426f6bd2b6e90b9254a72f256d14937c99d0ca5183897b414fd17c6a88803082c0c88690b1ad695347b29cc1bfd86de2a933cfcbff453b4fb93ad4b9e623
|
|
7
|
+
data.tar.gz: 756f1beb29e9ec3174e9b65aa7a52428a6fa87f1d9db9bec18e112afb26ff372c05d333e96e2c60b889d4a8cd72540c315ecd0719bae2ba3e2a5d6e8bbe8b4b8
|
data/lib/proiel/tokenization.rb
CHANGED
|
@@ -59,6 +59,8 @@ module PROIEL
|
|
|
59
59
|
form and form.length > 1
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
WORD_PATTERN = /([^[\u{E000}-\u{F8FF}][[:word:]]]+)/
|
|
63
|
+
|
|
62
64
|
# Splits a token form using the tokenization patterns that apply for a
|
|
63
65
|
# the specified language. Tokenization patterns must already have been
|
|
64
66
|
# loaded.
|
|
@@ -73,9 +75,9 @@ module PROIEL
|
|
|
73
75
|
raise ArgumentError, 'invalid language tag' unless language_tag.is_a?(String)
|
|
74
76
|
raise ArgumentError, 'invalid form' unless form.is_a?(String)
|
|
75
77
|
|
|
76
|
-
if form[
|
|
78
|
+
if form[WORD_PATTERN]
|
|
77
79
|
# Split on any non-word character like a space or punctuation
|
|
78
|
-
form.split(
|
|
80
|
+
form.split(WORD_PATTERN)
|
|
79
81
|
elsif @@regexes.key?(language_tag) and form[@@regexes[language_tag]]
|
|
80
82
|
# Apply language-specific pattern
|
|
81
83
|
form.match(@@regexes[language_tag]).captures
|
data/lib/proiel/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: proiel
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.1.
|
|
4
|
+
version: 1.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Marius L. Jøhndal
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-06-
|
|
11
|
+
date: 2016-06-12 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: json
|