logaling-command 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +18 -0
- data/README.md +5 -5
- data/bin/loga +1 -2
- data/lib/logaling/command/application.rb +383 -0
- data/lib/logaling/command/version.rb +22 -0
- data/lib/logaling/command.rb +2 -357
- data/lib/logaling/config.rb +86 -0
- data/lib/logaling/external_glossaries/freebsd_jpman.rb +38 -0
- data/lib/logaling/external_glossaries/mozilla_japan.rb +48 -0
- data/lib/logaling/glossary.rb +27 -17
- data/lib/logaling/glossary_db.rb +102 -53
- data/lib/logaling/repository.rb +8 -4
- data/logaling-command.gemspec +3 -3
- data/spec/logaling/command_spec.rb +78 -91
- data/spec/logaling/glossary_spec.rb +13 -11
- data/spec/logaling/repository_spec.rb +67 -42
- data/spec/spec_helper.rb +13 -3
- metadata +24 -19
@@ -0,0 +1,48 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# This program is free software: you can redistribute it and/or modify
|
3
|
+
# it under the terms of the GNU General Public License as published by
|
4
|
+
# the Free Software Foundation, either version 3 of the License, or
|
5
|
+
# (at your option) any later version.
|
6
|
+
#
|
7
|
+
# This program is distributed in the hope that it will be useful,
|
8
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
9
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
10
|
+
# GNU General Public License for more details.
|
11
|
+
#
|
12
|
+
# You should have received a copy of the GNU General Public License
|
13
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
14
|
+
|
15
|
+
require 'open-uri'
|
16
|
+
require 'nokogiri'
|
17
|
+
|
18
|
+
module Logaling
|
19
|
+
class MozillaJapan < ExternalGlossary
|
20
|
+
description 'Mozilla Japan (http://www.mozilla-japan.org/jp/l10n/term/l10n.html)'
|
21
|
+
source_language 'en'
|
22
|
+
target_language 'ja'
|
23
|
+
output_format 'csv'
|
24
|
+
|
25
|
+
private
|
26
|
+
def convert_to_csv(csv)
|
27
|
+
url = 'http://www.mozilla-japan.org/jp/l10n/term/l10n.html'
|
28
|
+
doc = ::Nokogiri::HTML(open(url, "r"))
|
29
|
+
doc.encoding = "UTF-8"
|
30
|
+
doc.search(".obsolete").remove
|
31
|
+
doc.search(".relate").remove
|
32
|
+
doc.css("dl[@class='terminology en-ja']").each do |dl|
|
33
|
+
dl.children.each_slice(2) do |dt, dd|
|
34
|
+
dd.text.split("|").each do |ddt|
|
35
|
+
ddt = ddt.gsub(/\s/, '')
|
36
|
+
unless ddt.empty?
|
37
|
+
csv << [dust_to_tilda(dt.text), dust_to_tilda(ddt)]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def dust_to_tilda(txt)
|
45
|
+
txt.gsub("\xEF\xBF\xBD", "\xE3\x80\x9C")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/logaling/glossary.rb
CHANGED
@@ -15,7 +15,12 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
-
|
18
|
+
begin
|
19
|
+
require 'psych'
|
20
|
+
rescue LoadError => e
|
21
|
+
raise LoadError unless e.message =~ /psych/
|
22
|
+
puts "please install psych first."
|
23
|
+
end
|
19
24
|
require "yaml"
|
20
25
|
require "csv"
|
21
26
|
require "fileutils"
|
@@ -55,32 +60,28 @@ module Logaling
|
|
55
60
|
end
|
56
61
|
glossary
|
57
62
|
end
|
58
|
-
|
59
|
-
def build_path(glossary, source_language, target_language)
|
60
|
-
fname = [glossary, source_language, target_language].join(".")
|
61
|
-
File.join(LOGALING_HOME, "projects", glossary, "glossary", "#{fname}.yml")
|
62
|
-
end
|
63
63
|
end
|
64
|
+
attr_reader :glossary, :source_language, :target_language
|
64
65
|
|
65
|
-
def initialize(glossary, source_language, target_language)
|
66
|
-
@
|
66
|
+
def initialize(glossary, source_language, target_language, logaling_home)
|
67
|
+
@logaling_home = logaling_home
|
67
68
|
@glossary = glossary
|
68
69
|
@source_language = source_language
|
69
70
|
@target_language = target_language
|
70
71
|
end
|
71
72
|
|
72
73
|
def add(source_term, target_term, note)
|
73
|
-
FileUtils.touch(
|
74
|
+
FileUtils.touch(source_path) unless File.exist?(source_path)
|
74
75
|
|
75
|
-
glossary = Glossary.load_glossary(
|
76
|
+
glossary = Glossary.load_glossary(source_path)
|
76
77
|
glossary << build_term(source_term, target_term, note)
|
77
78
|
dump_glossary(glossary)
|
78
79
|
end
|
79
80
|
|
80
81
|
def update(source_term, target_term, new_target_term, note)
|
81
|
-
raise GlossaryNotFound unless File.exist?(
|
82
|
+
raise GlossaryNotFound unless File.exist?(source_path)
|
82
83
|
|
83
|
-
glossary = Glossary.load_glossary(
|
84
|
+
glossary = Glossary.load_glossary(source_path)
|
84
85
|
|
85
86
|
target_index = find_term_index(glossary, source_term, target_term)
|
86
87
|
if target_index
|
@@ -92,9 +93,9 @@ module Logaling
|
|
92
93
|
end
|
93
94
|
|
94
95
|
def delete(source_term, target_term)
|
95
|
-
raise GlossaryNotFound unless File.exist?(
|
96
|
+
raise GlossaryNotFound unless File.exist?(source_path)
|
96
97
|
|
97
|
-
glossary = Glossary.load_glossary(
|
98
|
+
glossary = Glossary.load_glossary(source_path)
|
98
99
|
target_index = find_term_index(glossary, source_term, target_term)
|
99
100
|
unless target_index
|
100
101
|
raise TermError, "Can't found term '#{source_term} #{target_term}' in '#{@glossary}'" unless target_index
|
@@ -105,9 +106,9 @@ module Logaling
|
|
105
106
|
end
|
106
107
|
|
107
108
|
def delete_all(source_term, force=false)
|
108
|
-
raise GlossaryNotFound unless File.exist?(
|
109
|
+
raise GlossaryNotFound unless File.exist?(source_path)
|
109
110
|
|
110
|
-
glossary = Glossary.load_glossary(
|
111
|
+
glossary = Glossary.load_glossary(source_path)
|
111
112
|
delete_candidates = target_terms(glossary, source_term)
|
112
113
|
if delete_candidates.empty?
|
113
114
|
raise TermError, "Can't found term '#{source_term} in '#{@glossary}'"
|
@@ -123,6 +124,15 @@ module Logaling
|
|
123
124
|
end
|
124
125
|
end
|
125
126
|
|
127
|
+
def source_path
|
128
|
+
if @source_path
|
129
|
+
@source_path
|
130
|
+
else
|
131
|
+
fname = [@glossary, @source_language, @target_language].join(".")
|
132
|
+
@source_path = File.join(@logaling_home, "projects", @glossary, "glossary", "#{fname}.yml")
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
126
136
|
private
|
127
137
|
def build_term(source_term, target_term, note)
|
128
138
|
note ||= ''
|
@@ -152,7 +162,7 @@ module Logaling
|
|
152
162
|
end
|
153
163
|
|
154
164
|
def dump_glossary(glossary)
|
155
|
-
File.open(
|
165
|
+
File.open(source_path, "w") do |f|
|
156
166
|
f.puts(glossary.to_yaml)
|
157
167
|
end
|
158
168
|
end
|
data/lib/logaling/glossary_db.rb
CHANGED
@@ -81,18 +81,22 @@ module Logaling
|
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
84
|
-
def lookup(source_term,
|
84
|
+
def lookup(source_term, glossary_source=nil)
|
85
85
|
records_selected = Groonga["translations"].select do |record|
|
86
86
|
conditions = [record.source_term =~ source_term]
|
87
|
-
|
88
|
-
|
87
|
+
if glossary_source
|
88
|
+
conditions << (record.source_language =~ glossary_source.source_language) if glossary_source.source_language
|
89
|
+
conditions << (record.target_language =~ glossary_source.target_language) if glossary_source.target_language
|
90
|
+
end
|
89
91
|
conditions
|
90
92
|
end
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
93
|
+
if glossary_source
|
94
|
+
specified_glossary = records_selected.select do |record|
|
95
|
+
record.glossary == glossary_source.glossary
|
96
|
+
end
|
97
|
+
specified_glossary.each do |record|
|
98
|
+
record.key._score += 10
|
99
|
+
end
|
96
100
|
end
|
97
101
|
records = records_selected.sort([
|
98
102
|
{:key=>"_score", :order=>'descending'},
|
@@ -104,27 +108,55 @@ module Logaling
|
|
104
108
|
:html_escape => true,
|
105
109
|
:normalize => true}
|
106
110
|
snippet = records_selected.expression.snippet(["<snippet>", "</snippet>"], options)
|
111
|
+
struct_result(records, snippet)
|
112
|
+
ensure
|
113
|
+
snippet.close if snippet
|
114
|
+
records_selected.expression.close if records_selected
|
115
|
+
specified_glossary.expression.close if specified_glossary
|
116
|
+
end
|
107
117
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
118
|
+
|
119
|
+
def lookup_dictionary(search_word)
|
120
|
+
records_selected_source = Groonga["translations"].select do |record|
|
121
|
+
target = record.match_target do |match_record|
|
122
|
+
match_record.source_term * 2
|
123
|
+
end
|
124
|
+
target =~ search_word
|
125
|
+
end
|
126
|
+
completely_match = records_selected_source.select do |record|
|
127
|
+
record.source_term == search_word
|
128
|
+
end
|
129
|
+
completely_match.each do |record|
|
130
|
+
record.key._score += 10
|
119
131
|
end
|
132
|
+
|
133
|
+
records_selected_target = Groonga["translations"].select do |record|
|
134
|
+
record.target_term =~ search_word
|
135
|
+
end
|
136
|
+
|
137
|
+
records_selected = records_selected_target.union!(records_selected_source)
|
138
|
+
records = records_selected.sort([
|
139
|
+
{:key=>"_score", :order=>'descending'},
|
140
|
+
{:key=>"source_term", :order=>'ascending'},
|
141
|
+
{:key=>"target_term", :order=>'ascending'}])
|
142
|
+
|
143
|
+
options = {:width => 100,
|
144
|
+
:html_escape => true,
|
145
|
+
:normalize => true}
|
146
|
+
snippet = records_selected.expression.snippet(["<snippet>", "</snippet>"], options)
|
147
|
+
|
148
|
+
struct_result(records, snippet)
|
149
|
+
ensure
|
150
|
+
snippet.close if snippet
|
151
|
+
records_selected.expression.close if records_selected
|
120
152
|
end
|
121
153
|
|
122
|
-
def translation_list(
|
154
|
+
def translation_list(glossary_source)
|
123
155
|
records_raw = Groonga["translations"].select do |record|
|
124
156
|
[
|
125
|
-
record.glossary == glossary,
|
126
|
-
record.source_language == source_language,
|
127
|
-
record.target_language == target_language
|
157
|
+
record.glossary == glossary_source.glossary,
|
158
|
+
record.source_language == glossary_source.source_language,
|
159
|
+
record.target_language == glossary_source.target_language
|
128
160
|
]
|
129
161
|
end
|
130
162
|
|
@@ -132,16 +164,9 @@ module Logaling
|
|
132
164
|
{:key=>"source_term", :order=>'ascending'},
|
133
165
|
{:key=>"target_term", :order=>'ascending'}])
|
134
166
|
|
135
|
-
records
|
136
|
-
|
137
|
-
|
138
|
-
{:glossary_name => term.glossary.key,
|
139
|
-
:source_language => term.source_language,
|
140
|
-
:target_language => term.target_language,
|
141
|
-
:source_term => term.source_term,
|
142
|
-
:target_term => term.target_term,
|
143
|
-
:note => term.note || ''}
|
144
|
-
end
|
167
|
+
struct_result(records)
|
168
|
+
ensure
|
169
|
+
records_raw.expression.close
|
145
170
|
end
|
146
171
|
|
147
172
|
def get_bilingual_pair(source_term, target_term, glossary)
|
@@ -153,16 +178,9 @@ module Logaling
|
|
153
178
|
]
|
154
179
|
end
|
155
180
|
|
156
|
-
records
|
157
|
-
|
158
|
-
|
159
|
-
{:glossary_name => term.glossary,
|
160
|
-
:source_language => term.source_language,
|
161
|
-
:target_language => term.target_language,
|
162
|
-
:source_term => term.source_term,
|
163
|
-
:target_term => term.target_term,
|
164
|
-
:note => term.note || ''}
|
165
|
-
end
|
181
|
+
struct_result(records)
|
182
|
+
ensure
|
183
|
+
records.expression.close
|
166
184
|
end
|
167
185
|
|
168
186
|
def get_bilingual_pair_with_note(source_term, target_term, note, glossary)
|
@@ -175,16 +193,9 @@ module Logaling
|
|
175
193
|
]
|
176
194
|
end
|
177
195
|
|
178
|
-
records
|
179
|
-
|
180
|
-
|
181
|
-
{:glossary_name => term.glossary,
|
182
|
-
:source_language => term.source_language,
|
183
|
-
:target_language => term.target_language,
|
184
|
-
:source_term => term.source_term,
|
185
|
-
:target_term => term.target_term,
|
186
|
-
:note => term.note || ''}
|
187
|
-
end
|
196
|
+
struct_result(records)
|
197
|
+
ensure
|
198
|
+
records.expression.close
|
188
199
|
end
|
189
200
|
|
190
201
|
def glossary_source_exist?(glossary_source, indexed_at)
|
@@ -195,6 +206,8 @@ module Logaling
|
|
195
206
|
]
|
196
207
|
end
|
197
208
|
!glossary.size.zero?
|
209
|
+
ensure
|
210
|
+
glossary.expression.close
|
198
211
|
end
|
199
212
|
|
200
213
|
def get_all_glossary_source
|
@@ -218,6 +231,8 @@ module Logaling
|
|
218
231
|
records.each do |record|
|
219
232
|
record.key.delete
|
220
233
|
end
|
234
|
+
ensure
|
235
|
+
records.expression.close
|
221
236
|
end
|
222
237
|
|
223
238
|
def add_glossary_source(glossary_source, indexed_at)
|
@@ -232,6 +247,8 @@ module Logaling
|
|
232
247
|
records.each do |record|
|
233
248
|
record.key.delete
|
234
249
|
end
|
250
|
+
ensure
|
251
|
+
records.expression.close
|
235
252
|
end
|
236
253
|
|
237
254
|
def add_glossary(glossary_name)
|
@@ -246,6 +263,8 @@ module Logaling
|
|
246
263
|
records.each do |record|
|
247
264
|
record.key.delete
|
248
265
|
end
|
266
|
+
ensure
|
267
|
+
records.expression.close
|
249
268
|
end
|
250
269
|
|
251
270
|
def add_translation(glossary_name, glossary_source, source_language, target_language, source_term, target_term, note)
|
@@ -302,6 +321,7 @@ module Logaling
|
|
302
321
|
:key_normalize => true,
|
303
322
|
:default_tokenizer => "TokenBigram") do |table|
|
304
323
|
table.index("translations.source_term")
|
324
|
+
table.index("translations.target_term")
|
305
325
|
end
|
306
326
|
end
|
307
327
|
end
|
@@ -320,7 +340,24 @@ module Logaling
|
|
320
340
|
@database.nil? or @database.closed?
|
321
341
|
end
|
322
342
|
|
343
|
+
def struct_result(records, snippet=nil)
|
344
|
+
records.map do |record|
|
345
|
+
term = record.key
|
346
|
+
snipped_source_term = snippet ? snip_source_term(term, snippet) : []
|
347
|
+
snipped_target_term = snippet ? snip_target_term(term, snippet) : []
|
348
|
+
{:glossary_name => term.glossary.key,
|
349
|
+
:source_language => term.source_language,
|
350
|
+
:target_language => term.target_language,
|
351
|
+
:source_term => term.source_term,
|
352
|
+
:snipped_source_term => snipped_source_term,
|
353
|
+
:target_term => term.target_term,
|
354
|
+
:snipped_target_term => snipped_target_term,
|
355
|
+
:note => term.note || ''}
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
323
359
|
def struct_snipped_text(snipped_text)
|
360
|
+
return [] if snipped_text.empty?
|
324
361
|
word_list = snipped_text.split(/(<snippet>[^<]*<\/snippet>)/)
|
325
362
|
structed_source_term = word_list.map{|word|
|
326
363
|
replaced_word = word.sub(/<snippet>([^<]*)<\/snippet>/){|match| $1}
|
@@ -333,6 +370,16 @@ module Logaling
|
|
333
370
|
structed_source_term
|
334
371
|
end
|
335
372
|
|
373
|
+
def snip_source_term(term, snippet)
|
374
|
+
snipped_text = snippet.execute(term.source_term).join
|
375
|
+
struct_snipped_text(snipped_text)
|
376
|
+
end
|
377
|
+
|
378
|
+
def snip_target_term(term, snippet)
|
379
|
+
snipped_text = snippet.execute(term.target_term).join
|
380
|
+
struct_snipped_text(snipped_text)
|
381
|
+
end
|
382
|
+
|
336
383
|
def get_config(conf_key)
|
337
384
|
records = Groonga["configurations"].select do |record|
|
338
385
|
record.conf_key == conf_key
|
@@ -342,6 +389,8 @@ module Logaling
|
|
342
389
|
config.conf_value
|
343
390
|
end
|
344
391
|
value.size > 0 ? value[0] : ""
|
392
|
+
ensure
|
393
|
+
records.expression.close
|
345
394
|
end
|
346
395
|
|
347
396
|
def add_config(conf_key, conf_value)
|
data/lib/logaling/repository.rb
CHANGED
@@ -50,22 +50,26 @@ module Logaling
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
-
def lookup(source_term,
|
53
|
+
def lookup(source_term, glossary_source, dictionary=false)
|
54
54
|
raise GlossaryDBNotFound unless File.exist?(logaling_db_home)
|
55
55
|
|
56
56
|
terms = []
|
57
57
|
Logaling::GlossaryDB.open(logaling_db_home, "utf8") do |db|
|
58
|
-
|
58
|
+
if dictionary
|
59
|
+
terms = db.lookup_dictionary(source_term)
|
60
|
+
else
|
61
|
+
terms = db.lookup(source_term, glossary_source)
|
62
|
+
end
|
59
63
|
end
|
60
64
|
terms
|
61
65
|
end
|
62
66
|
|
63
|
-
def show_glossary(
|
67
|
+
def show_glossary(glossary_source)
|
64
68
|
raise GlossaryDBNotFound unless File.exist?(logaling_db_home)
|
65
69
|
|
66
70
|
terms = []
|
67
71
|
Logaling::GlossaryDB.open(logaling_db_home, "utf8") do |db|
|
68
|
-
terms = db.translation_list(
|
72
|
+
terms = db.translation_list(glossary_source)
|
69
73
|
end
|
70
74
|
terms
|
71
75
|
end
|
data/logaling-command.gemspec
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# -*-
|
1
|
+
# -*- coding: utf-8 -*-
|
2
2
|
#
|
3
3
|
# Copyright (C) 2011 Miho SUZUKI
|
4
4
|
#
|
@@ -16,7 +16,7 @@
|
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
18
|
$:.push File.expand_path("../lib", __FILE__)
|
19
|
-
require "logaling/command"
|
19
|
+
require "logaling/command/version"
|
20
20
|
|
21
21
|
Gem::Specification.new do |s|
|
22
22
|
s.name = "logaling-command"
|
@@ -41,7 +41,7 @@ Gem::Specification.new do |s|
|
|
41
41
|
s.add_runtime_dependency 'rroonga', ['>= 1.3.0']
|
42
42
|
s.add_runtime_dependency 'rainbow'
|
43
43
|
s.add_runtime_dependency 'nokogiri'
|
44
|
-
s.add_runtime_dependency '
|
44
|
+
s.add_runtime_dependency 'activesupport'
|
45
45
|
|
46
46
|
s.add_development_dependency 'rake'
|
47
47
|
s.add_development_dependency 'rspec'
|