pollex 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +3 -0
- data/lib/pollex/source.rb +21 -12
- data/lib/pollex/translator.rb +1 -1
- data/lib/pollex/version.rb +1 -1
- metadata +3 -3
data/CHANGELOG
CHANGED
data/lib/pollex/source.rb
CHANGED
@@ -42,9 +42,6 @@ module Pollex
|
|
42
42
|
|
43
43
|
# Returns grammatical information for this source, used for
|
44
44
|
# intelligently parsing the descriptions of entries from this source
|
45
|
-
# @note Information is currently entered for all sources on
|
46
|
-
# http://pollex.org.nz/source/ up to (and including)
|
47
|
-
# Bse
|
48
45
|
# @return [Hash] grammatical information pertaining to the descriptions
|
49
46
|
# of this sources' entries
|
50
47
|
# @see Entry#terms
|
@@ -58,48 +55,60 @@ module Pollex
|
|
58
55
|
|
59
56
|
# now bring in source-specific information
|
60
57
|
|
61
|
-
if ['Cnt', 'Bxn'].include? @code
|
58
|
+
if ['Cnt', 'Bxn', 'Egt', 'Fts'].include? @code
|
62
59
|
# Spanish-language sources
|
63
60
|
language = 'es'
|
64
|
-
elsif ['Aca', 'Bgn', 'Btn', 'Hmn', 'Rch'].include? @code
|
61
|
+
elsif ['Aca', 'Bgn', 'Btn', 'Hmn', 'Rch', 'Dln', 'Gzl', 'Jnu', 'Jsn', 'Rve', 'Lvs', 'Lch', 'Lmt', 'Myr', 'Mfr', 'Rdl', 'Sgs'].include? @code
|
65
62
|
# French-language sources
|
66
63
|
language = 'fr'
|
64
|
+
elsif ['Ths'].include? @code
|
65
|
+
# German-language sources
|
66
|
+
language = 'de'
|
67
67
|
end
|
68
68
|
|
69
|
-
if ['Aca', 'Bxn'].include? @code
|
69
|
+
if ['Aca', 'Bxn', 'Jsn', 'Mtu', 'Grn'].include? @code
|
70
70
|
# split by comma, semicolon, period
|
71
71
|
dividers = /(,|;|\. )/
|
72
|
-
elsif ['Atn', 'Bwh', 'Hmn'].include? @code
|
72
|
+
elsif ['Atn', 'Bwh', 'Hmn', 'Crk', 'Hdy', 'Smt', 'Rkj'].include? @code
|
73
73
|
# don't split at all
|
74
74
|
dividers = '\n' # dividers = nil doesn't work
|
75
|
-
elsif ['Bgn', 'Bst', 'Brn'].include? @code
|
75
|
+
elsif ['Bgn', 'Bst', 'Brn', 'Gms', 'Tmo'].include? @code
|
76
76
|
# split by period
|
77
77
|
dividers = '.'
|
78
78
|
elsif ['Bkr', 'Bgs'].include? @code
|
79
79
|
# split by comma, period
|
80
80
|
dividers = /(,|\. )/
|
81
|
-
elsif ['Bge', 'Bck'].include? @code
|
81
|
+
elsif ['Bge', 'Bck', 'Cbl', 'Chn', 'Cdn', 'Dvs', 'Dnr', 'Dln', 'Dye', 'Ebt', 'Egt', 'Fbg', 'Fth', 'Fox', 'Fts', 'Hzd', 'Hry', 'Hvn', 'Hnh', 'Fny', 'Mta', 'Myr', 'Mtx', 'Mnr', 'Mbg', 'Kvt', 'Ply', 'Ebt', 'Mka', 'Sby', 'Sve', 'Sta', 'Sma', 'Sks', 'Tbs', 'Tgr', 'Whe', 'Whr', 'Rmn', 'Wms', 'Ykr'].include? @code
|
82
82
|
# split by semicolon
|
83
83
|
dividers = ';'
|
84
|
+
elsif ['Drd', 'Hbn', 'Mkn', 'Rdl', 'Bke'].include? @code
|
85
|
+
# split by semicolon, period
|
86
|
+
dividers = /(;|\. )/
|
84
87
|
end
|
85
88
|
|
86
|
-
if ['McP', 'Dsn'].include? @code
|
89
|
+
if ['McP', 'Dsn', 'Gzl', 'Sby', 'Sph'].include? @code
|
87
90
|
# Trim all (parenthetical expressions)
|
88
91
|
trim_expressions = /\(.*\)/
|
89
|
-
elsif ['Cnt', 'Aca', 'Bse', 'Hmn'].include? @code
|
92
|
+
elsif ['Cnt', 'Aca', 'Bse', 'Hmn', 'Cbl', 'Cpl', 'Crn', 'Chn', 'Chl', 'Cwd', 'Clk', 'Cek', 'Crk', 'Dvs', 'Dtn', 'Dnr', 'Dty', 'Fth', 'Fox', 'Fts', 'Gmd', 'McC', 'Hwd', 'Ivs', 'Lmt', 'Lvs', 'Lmt', 'Lbr', 'Mar', 'Mta', 'Myr', 'McE', 'Mnr', 'Mfr', 'Mtu', 'Gty', 'Ply', 'Rby', 'Mka', 'Clk', 'Sve', 'Shd', 'Sma', 'Stn', 'Sks', 'Tgr', 'Whe', 'Mke', 'Whr'].include? @code
|
90
93
|
# Trim parenthetical expressions that are <= 4 chars or contain numbers
|
91
94
|
trim_expressions = /\((.{0,4}|.*[0-9].*)\)/
|
92
95
|
elsif ['Stz', 'Bck'].include? @code
|
93
96
|
# Trim parenthetical expressions that contain numbers
|
94
97
|
trim_expressions = /\(.*[0-9].*\)/
|
98
|
+
elsif ['Kch', 'Ray'].include? @code
|
99
|
+
# Trim all [bracketed expressions]
|
100
|
+
trim_expressions = /\[.*\]/
|
95
101
|
elsif ['Rsr'].include? @code
|
96
102
|
# Trim all "expressions in quotes"
|
97
103
|
trim_expressions = /".*"/
|
98
104
|
end
|
99
105
|
|
100
|
-
if ['Btl', 'Bck'].include? @code
|
106
|
+
if ['Btl', 'Bck', 'Chl', 'McC', 'Hpr', 'Mbg', 'Wte'].include? @code
|
101
107
|
# Trim everything after a period
|
102
108
|
trim_after = '.'
|
109
|
+
elsif ['Shd'].include? @code
|
110
|
+
# Trim everything after an equals sign
|
111
|
+
trim_after = '='
|
103
112
|
end
|
104
113
|
|
105
114
|
{
|
data/lib/pollex/translator.rb
CHANGED
@@ -28,7 +28,7 @@ module Pollex
|
|
28
28
|
@cache[key]
|
29
29
|
else
|
30
30
|
# make a request to MyMemory
|
31
|
-
puts "Translating '#{phrase}' ..."
|
31
|
+
puts "Translating '#{phrase}' from (#{source_lang_code}) ..."
|
32
32
|
url = "http://mymemory.translated.net/api/get?q=#{URI::encode(phrase)}&langpair=#{source_lang_code}%7Cen"
|
33
33
|
results_json = open(url).read
|
34
34
|
result = JSON.parse(results_json)['responseData']['translatedText']
|
data/lib/pollex/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pollex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -115,7 +115,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
115
115
|
version: '0'
|
116
116
|
requirements: []
|
117
117
|
rubyforge_project:
|
118
|
-
rubygems_version: 1.8.
|
118
|
+
rubygems_version: 1.8.25
|
119
119
|
signing_key:
|
120
120
|
specification_version: 3
|
121
121
|
summary: Ruby wrapper for scraping pollex (the Polynesian Lexicon Project)
|