pollex 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/lib/pollex/source.rb +21 -12
- data/lib/pollex/translator.rb +1 -1
- data/lib/pollex/version.rb +1 -1
- metadata +3 -3
data/CHANGELOG
CHANGED
data/lib/pollex/source.rb
CHANGED
@@ -42,9 +42,6 @@ module Pollex
|
|
42
42
|
|
43
43
|
# Returns grammatical information for this source, used for
|
44
44
|
# intelligently parsing the descriptions of entries from this source
|
45
|
-
# @note Information is currently entered for all sources on
|
46
|
-
# http://pollex.org.nz/source/ up to (and including)
|
47
|
-
# Bse
|
48
45
|
# @return [Hash] grammatical information pertaining to the descriptions
|
49
46
|
# of this sources' entries
|
50
47
|
# @see Entry#terms
|
@@ -58,48 +55,60 @@ module Pollex
|
|
58
55
|
|
59
56
|
# now bring in source-specific information
|
60
57
|
|
61
|
-
if ['Cnt', 'Bxn'].include? @code
|
58
|
+
if ['Cnt', 'Bxn', 'Egt', 'Fts'].include? @code
|
62
59
|
# Spanish-language sources
|
63
60
|
language = 'es'
|
64
|
-
elsif ['Aca', 'Bgn', 'Btn', 'Hmn', 'Rch'].include? @code
|
61
|
+
elsif ['Aca', 'Bgn', 'Btn', 'Hmn', 'Rch', 'Dln', 'Gzl', 'Jnu', 'Jsn', 'Rve', 'Lvs', 'Lch', 'Lmt', 'Myr', 'Mfr', 'Rdl', 'Sgs'].include? @code
|
65
62
|
# French-language sources
|
66
63
|
language = 'fr'
|
64
|
+
elsif ['Ths'].include? @code
|
65
|
+
# German-language sources
|
66
|
+
language = 'de'
|
67
67
|
end
|
68
68
|
|
69
|
-
if ['Aca', 'Bxn'].include? @code
|
69
|
+
if ['Aca', 'Bxn', 'Jsn', 'Mtu', 'Grn'].include? @code
|
70
70
|
# split by comma, semicolon, period
|
71
71
|
dividers = /(,|;|\. )/
|
72
|
-
elsif ['Atn', 'Bwh', 'Hmn'].include? @code
|
72
|
+
elsif ['Atn', 'Bwh', 'Hmn', 'Crk', 'Hdy', 'Smt', 'Rkj'].include? @code
|
73
73
|
# don't split at all
|
74
74
|
dividers = '\n' # dividers = nil doesn't work
|
75
|
-
elsif ['Bgn', 'Bst', 'Brn'].include? @code
|
75
|
+
elsif ['Bgn', 'Bst', 'Brn', 'Gms', 'Tmo'].include? @code
|
76
76
|
# split by period
|
77
77
|
dividers = '.'
|
78
78
|
elsif ['Bkr', 'Bgs'].include? @code
|
79
79
|
# split by comma, period
|
80
80
|
dividers = /(,|\. )/
|
81
|
-
elsif ['Bge', 'Bck'].include? @code
|
81
|
+
elsif ['Bge', 'Bck', 'Cbl', 'Chn', 'Cdn', 'Dvs', 'Dnr', 'Dln', 'Dye', 'Ebt', 'Egt', 'Fbg', 'Fth', 'Fox', 'Fts', 'Hzd', 'Hry', 'Hvn', 'Hnh', 'Fny', 'Mta', 'Myr', 'Mtx', 'Mnr', 'Mbg', 'Kvt', 'Ply', 'Ebt', 'Mka', 'Sby', 'Sve', 'Sta', 'Sma', 'Sks', 'Tbs', 'Tgr', 'Whe', 'Whr', 'Rmn', 'Wms', 'Ykr'].include? @code
|
82
82
|
# split by semicolon
|
83
83
|
dividers = ';'
|
84
|
+
elsif ['Drd', 'Hbn', 'Mkn', 'Rdl', 'Bke'].include? @code
|
85
|
+
# split by semicolon, period
|
86
|
+
dividers = /(;|\. )/
|
84
87
|
end
|
85
88
|
|
86
|
-
if ['McP', 'Dsn'].include? @code
|
89
|
+
if ['McP', 'Dsn', 'Gzl', 'Sby', 'Sph'].include? @code
|
87
90
|
# Trim all (parenthetical expressions)
|
88
91
|
trim_expressions = /\(.*\)/
|
89
|
-
elsif ['Cnt', 'Aca', 'Bse', 'Hmn'].include? @code
|
92
|
+
elsif ['Cnt', 'Aca', 'Bse', 'Hmn', 'Cbl', 'Cpl', 'Crn', 'Chn', 'Chl', 'Cwd', 'Clk', 'Cek', 'Crk', 'Dvs', 'Dtn', 'Dnr', 'Dty', 'Fth', 'Fox', 'Fts', 'Gmd', 'McC', 'Hwd', 'Ivs', 'Lmt', 'Lvs', 'Lmt', 'Lbr', 'Mar', 'Mta', 'Myr', 'McE', 'Mnr', 'Mfr', 'Mtu', 'Gty', 'Ply', 'Rby', 'Mka', 'Clk', 'Sve', 'Shd', 'Sma', 'Stn', 'Sks', 'Tgr', 'Whe', 'Mke', 'Whr'].include? @code
|
90
93
|
# Trim parenthetical expressions that are <= 4 chars or contain numbers
|
91
94
|
trim_expressions = /\((.{0,4}|.*[0-9].*)\)/
|
92
95
|
elsif ['Stz', 'Bck'].include? @code
|
93
96
|
# Trim parenthetical expressions that contain numbers
|
94
97
|
trim_expressions = /\(.*[0-9].*\)/
|
98
|
+
elsif ['Kch', 'Ray'].include? @code
|
99
|
+
# Trim all [bracketed expressions]
|
100
|
+
trim_expressions = /\[.*\]/
|
95
101
|
elsif ['Rsr'].include? @code
|
96
102
|
# Trim all "expressions in quotes"
|
97
103
|
trim_expressions = /".*"/
|
98
104
|
end
|
99
105
|
|
100
|
-
if ['Btl', 'Bck'].include? @code
|
106
|
+
if ['Btl', 'Bck', 'Chl', 'McC', 'Hpr', 'Mbg', 'Wte'].include? @code
|
101
107
|
# Trim everything after a period
|
102
108
|
trim_after = '.'
|
109
|
+
elsif ['Shd'].include? @code
|
110
|
+
# Trim everything after an equals sign
|
111
|
+
trim_after = '='
|
103
112
|
end
|
104
113
|
|
105
114
|
{
|
data/lib/pollex/translator.rb
CHANGED
@@ -28,7 +28,7 @@ module Pollex
|
|
28
28
|
@cache[key]
|
29
29
|
else
|
30
30
|
# make a request to MyMemory
|
31
|
-
puts "Translating '#{phrase}' ..."
|
31
|
+
puts "Translating '#{phrase}' from (#{source_lang_code}) ..."
|
32
32
|
url = "http://mymemory.translated.net/api/get?q=#{URI::encode(phrase)}&langpair=#{source_lang_code}%7Cen"
|
33
33
|
results_json = open(url).read
|
34
34
|
result = JSON.parse(results_json)['responseData']['translatedText']
|
data/lib/pollex/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pollex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -115,7 +115,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
115
115
|
version: '0'
|
116
116
|
requirements: []
|
117
117
|
rubyforge_project:
|
118
|
-
rubygems_version: 1.8.
|
118
|
+
rubygems_version: 1.8.25
|
119
119
|
signing_key:
|
120
120
|
specification_version: 3
|
121
121
|
summary: Ruby wrapper for scraping pollex (the Polynesian Lexicon Project)
|