logaling-command 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +18 -0
- data/README.md +5 -5
- data/bin/loga +1 -2
- data/lib/logaling/command/application.rb +383 -0
- data/lib/logaling/command/version.rb +22 -0
- data/lib/logaling/command.rb +2 -357
- data/lib/logaling/config.rb +86 -0
- data/lib/logaling/external_glossaries/freebsd_jpman.rb +38 -0
- data/lib/logaling/external_glossaries/mozilla_japan.rb +48 -0
- data/lib/logaling/glossary.rb +27 -17
- data/lib/logaling/glossary_db.rb +102 -53
- data/lib/logaling/repository.rb +8 -4
- data/logaling-command.gemspec +3 -3
- data/spec/logaling/command_spec.rb +78 -91
- data/spec/logaling/glossary_spec.rb +13 -11
- data/spec/logaling/repository_spec.rb +67 -42
- data/spec/spec_helper.rb +13 -3
- metadata +24 -19
@@ -0,0 +1,48 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# This program is free software: you can redistribute it and/or modify
|
3
|
+
# it under the terms of the GNU General Public License as published by
|
4
|
+
# the Free Software Foundation, either version 3 of the License, or
|
5
|
+
# (at your option) any later version.
|
6
|
+
#
|
7
|
+
# This program is distributed in the hope that it will be useful,
|
8
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
9
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
10
|
+
# GNU General Public License for more details.
|
11
|
+
#
|
12
|
+
# You should have received a copy of the GNU General Public License
|
13
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
14
|
+
|
15
|
+
require 'open-uri'
|
16
|
+
require 'nokogiri'
|
17
|
+
|
18
|
+
module Logaling
|
19
|
+
class MozillaJapan < ExternalGlossary
|
20
|
+
description 'Mozilla Japan (http://www.mozilla-japan.org/jp/l10n/term/l10n.html)'
|
21
|
+
source_language 'en'
|
22
|
+
target_language 'ja'
|
23
|
+
output_format 'csv'
|
24
|
+
|
25
|
+
private
|
26
|
+
def convert_to_csv(csv)
|
27
|
+
url = 'http://www.mozilla-japan.org/jp/l10n/term/l10n.html'
|
28
|
+
doc = ::Nokogiri::HTML(open(url, "r"))
|
29
|
+
doc.encoding = "UTF-8"
|
30
|
+
doc.search(".obsolete").remove
|
31
|
+
doc.search(".relate").remove
|
32
|
+
doc.css("dl[@class='terminology en-ja']").each do |dl|
|
33
|
+
dl.children.each_slice(2) do |dt, dd|
|
34
|
+
dd.text.split("|").each do |ddt|
|
35
|
+
ddt = ddt.gsub(/\s/, '')
|
36
|
+
unless ddt.empty?
|
37
|
+
csv << [dust_to_tilda(dt.text), dust_to_tilda(ddt)]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def dust_to_tilda(txt)
|
45
|
+
txt.gsub("\xEF\xBF\xBD", "\xE3\x80\x9C")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/logaling/glossary.rb
CHANGED
@@ -15,7 +15,12 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
-
|
18
|
+
begin
|
19
|
+
require 'psych'
|
20
|
+
rescue LoadError => e
|
21
|
+
raise LoadError unless e.message =~ /psych/
|
22
|
+
puts "please install psych first."
|
23
|
+
end
|
19
24
|
require "yaml"
|
20
25
|
require "csv"
|
21
26
|
require "fileutils"
|
@@ -55,32 +60,28 @@ module Logaling
|
|
55
60
|
end
|
56
61
|
glossary
|
57
62
|
end
|
58
|
-
|
59
|
-
def build_path(glossary, source_language, target_language)
|
60
|
-
fname = [glossary, source_language, target_language].join(".")
|
61
|
-
File.join(LOGALING_HOME, "projects", glossary, "glossary", "#{fname}.yml")
|
62
|
-
end
|
63
63
|
end
|
64
|
+
attr_reader :glossary, :source_language, :target_language
|
64
65
|
|
65
|
-
def initialize(glossary, source_language, target_language)
|
66
|
-
@
|
66
|
+
def initialize(glossary, source_language, target_language, logaling_home)
|
67
|
+
@logaling_home = logaling_home
|
67
68
|
@glossary = glossary
|
68
69
|
@source_language = source_language
|
69
70
|
@target_language = target_language
|
70
71
|
end
|
71
72
|
|
72
73
|
def add(source_term, target_term, note)
|
73
|
-
FileUtils.touch(
|
74
|
+
FileUtils.touch(source_path) unless File.exist?(source_path)
|
74
75
|
|
75
|
-
glossary = Glossary.load_glossary(
|
76
|
+
glossary = Glossary.load_glossary(source_path)
|
76
77
|
glossary << build_term(source_term, target_term, note)
|
77
78
|
dump_glossary(glossary)
|
78
79
|
end
|
79
80
|
|
80
81
|
def update(source_term, target_term, new_target_term, note)
|
81
|
-
raise GlossaryNotFound unless File.exist?(
|
82
|
+
raise GlossaryNotFound unless File.exist?(source_path)
|
82
83
|
|
83
|
-
glossary = Glossary.load_glossary(
|
84
|
+
glossary = Glossary.load_glossary(source_path)
|
84
85
|
|
85
86
|
target_index = find_term_index(glossary, source_term, target_term)
|
86
87
|
if target_index
|
@@ -92,9 +93,9 @@ module Logaling
|
|
92
93
|
end
|
93
94
|
|
94
95
|
def delete(source_term, target_term)
|
95
|
-
raise GlossaryNotFound unless File.exist?(
|
96
|
+
raise GlossaryNotFound unless File.exist?(source_path)
|
96
97
|
|
97
|
-
glossary = Glossary.load_glossary(
|
98
|
+
glossary = Glossary.load_glossary(source_path)
|
98
99
|
target_index = find_term_index(glossary, source_term, target_term)
|
99
100
|
unless target_index
|
100
101
|
raise TermError, "Can't found term '#{source_term} #{target_term}' in '#{@glossary}'" unless target_index
|
@@ -105,9 +106,9 @@ module Logaling
|
|
105
106
|
end
|
106
107
|
|
107
108
|
def delete_all(source_term, force=false)
|
108
|
-
raise GlossaryNotFound unless File.exist?(
|
109
|
+
raise GlossaryNotFound unless File.exist?(source_path)
|
109
110
|
|
110
|
-
glossary = Glossary.load_glossary(
|
111
|
+
glossary = Glossary.load_glossary(source_path)
|
111
112
|
delete_candidates = target_terms(glossary, source_term)
|
112
113
|
if delete_candidates.empty?
|
113
114
|
raise TermError, "Can't found term '#{source_term} in '#{@glossary}'"
|
@@ -123,6 +124,15 @@ module Logaling
|
|
123
124
|
end
|
124
125
|
end
|
125
126
|
|
127
|
+
def source_path
|
128
|
+
if @source_path
|
129
|
+
@source_path
|
130
|
+
else
|
131
|
+
fname = [@glossary, @source_language, @target_language].join(".")
|
132
|
+
@source_path = File.join(@logaling_home, "projects", @glossary, "glossary", "#{fname}.yml")
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
126
136
|
private
|
127
137
|
def build_term(source_term, target_term, note)
|
128
138
|
note ||= ''
|
@@ -152,7 +162,7 @@ module Logaling
|
|
152
162
|
end
|
153
163
|
|
154
164
|
def dump_glossary(glossary)
|
155
|
-
File.open(
|
165
|
+
File.open(source_path, "w") do |f|
|
156
166
|
f.puts(glossary.to_yaml)
|
157
167
|
end
|
158
168
|
end
|
data/lib/logaling/glossary_db.rb
CHANGED
@@ -81,18 +81,22 @@ module Logaling
|
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
84
|
-
def lookup(source_term,
|
84
|
+
def lookup(source_term, glossary_source=nil)
|
85
85
|
records_selected = Groonga["translations"].select do |record|
|
86
86
|
conditions = [record.source_term =~ source_term]
|
87
|
-
|
88
|
-
|
87
|
+
if glossary_source
|
88
|
+
conditions << (record.source_language =~ glossary_source.source_language) if glossary_source.source_language
|
89
|
+
conditions << (record.target_language =~ glossary_source.target_language) if glossary_source.target_language
|
90
|
+
end
|
89
91
|
conditions
|
90
92
|
end
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
93
|
+
if glossary_source
|
94
|
+
specified_glossary = records_selected.select do |record|
|
95
|
+
record.glossary == glossary_source.glossary
|
96
|
+
end
|
97
|
+
specified_glossary.each do |record|
|
98
|
+
record.key._score += 10
|
99
|
+
end
|
96
100
|
end
|
97
101
|
records = records_selected.sort([
|
98
102
|
{:key=>"_score", :order=>'descending'},
|
@@ -104,27 +108,55 @@ module Logaling
|
|
104
108
|
:html_escape => true,
|
105
109
|
:normalize => true}
|
106
110
|
snippet = records_selected.expression.snippet(["<snippet>", "</snippet>"], options)
|
111
|
+
struct_result(records, snippet)
|
112
|
+
ensure
|
113
|
+
snippet.close if snippet
|
114
|
+
records_selected.expression.close if records_selected
|
115
|
+
specified_glossary.expression.close if specified_glossary
|
116
|
+
end
|
107
117
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
118
|
+
|
119
|
+
def lookup_dictionary(search_word)
|
120
|
+
records_selected_source = Groonga["translations"].select do |record|
|
121
|
+
target = record.match_target do |match_record|
|
122
|
+
match_record.source_term * 2
|
123
|
+
end
|
124
|
+
target =~ search_word
|
125
|
+
end
|
126
|
+
completely_match = records_selected_source.select do |record|
|
127
|
+
record.source_term == search_word
|
128
|
+
end
|
129
|
+
completely_match.each do |record|
|
130
|
+
record.key._score += 10
|
119
131
|
end
|
132
|
+
|
133
|
+
records_selected_target = Groonga["translations"].select do |record|
|
134
|
+
record.target_term =~ search_word
|
135
|
+
end
|
136
|
+
|
137
|
+
records_selected = records_selected_target.union!(records_selected_source)
|
138
|
+
records = records_selected.sort([
|
139
|
+
{:key=>"_score", :order=>'descending'},
|
140
|
+
{:key=>"source_term", :order=>'ascending'},
|
141
|
+
{:key=>"target_term", :order=>'ascending'}])
|
142
|
+
|
143
|
+
options = {:width => 100,
|
144
|
+
:html_escape => true,
|
145
|
+
:normalize => true}
|
146
|
+
snippet = records_selected.expression.snippet(["<snippet>", "</snippet>"], options)
|
147
|
+
|
148
|
+
struct_result(records, snippet)
|
149
|
+
ensure
|
150
|
+
snippet.close if snippet
|
151
|
+
records_selected.expression.close if records_selected
|
120
152
|
end
|
121
153
|
|
122
|
-
def translation_list(
|
154
|
+
def translation_list(glossary_source)
|
123
155
|
records_raw = Groonga["translations"].select do |record|
|
124
156
|
[
|
125
|
-
record.glossary == glossary,
|
126
|
-
record.source_language == source_language,
|
127
|
-
record.target_language == target_language
|
157
|
+
record.glossary == glossary_source.glossary,
|
158
|
+
record.source_language == glossary_source.source_language,
|
159
|
+
record.target_language == glossary_source.target_language
|
128
160
|
]
|
129
161
|
end
|
130
162
|
|
@@ -132,16 +164,9 @@ module Logaling
|
|
132
164
|
{:key=>"source_term", :order=>'ascending'},
|
133
165
|
{:key=>"target_term", :order=>'ascending'}])
|
134
166
|
|
135
|
-
records
|
136
|
-
|
137
|
-
|
138
|
-
{:glossary_name => term.glossary.key,
|
139
|
-
:source_language => term.source_language,
|
140
|
-
:target_language => term.target_language,
|
141
|
-
:source_term => term.source_term,
|
142
|
-
:target_term => term.target_term,
|
143
|
-
:note => term.note || ''}
|
144
|
-
end
|
167
|
+
struct_result(records)
|
168
|
+
ensure
|
169
|
+
records_raw.expression.close
|
145
170
|
end
|
146
171
|
|
147
172
|
def get_bilingual_pair(source_term, target_term, glossary)
|
@@ -153,16 +178,9 @@ module Logaling
|
|
153
178
|
]
|
154
179
|
end
|
155
180
|
|
156
|
-
records
|
157
|
-
|
158
|
-
|
159
|
-
{:glossary_name => term.glossary,
|
160
|
-
:source_language => term.source_language,
|
161
|
-
:target_language => term.target_language,
|
162
|
-
:source_term => term.source_term,
|
163
|
-
:target_term => term.target_term,
|
164
|
-
:note => term.note || ''}
|
165
|
-
end
|
181
|
+
struct_result(records)
|
182
|
+
ensure
|
183
|
+
records.expression.close
|
166
184
|
end
|
167
185
|
|
168
186
|
def get_bilingual_pair_with_note(source_term, target_term, note, glossary)
|
@@ -175,16 +193,9 @@ module Logaling
|
|
175
193
|
]
|
176
194
|
end
|
177
195
|
|
178
|
-
records
|
179
|
-
|
180
|
-
|
181
|
-
{:glossary_name => term.glossary,
|
182
|
-
:source_language => term.source_language,
|
183
|
-
:target_language => term.target_language,
|
184
|
-
:source_term => term.source_term,
|
185
|
-
:target_term => term.target_term,
|
186
|
-
:note => term.note || ''}
|
187
|
-
end
|
196
|
+
struct_result(records)
|
197
|
+
ensure
|
198
|
+
records.expression.close
|
188
199
|
end
|
189
200
|
|
190
201
|
def glossary_source_exist?(glossary_source, indexed_at)
|
@@ -195,6 +206,8 @@ module Logaling
|
|
195
206
|
]
|
196
207
|
end
|
197
208
|
!glossary.size.zero?
|
209
|
+
ensure
|
210
|
+
glossary.expression.close
|
198
211
|
end
|
199
212
|
|
200
213
|
def get_all_glossary_source
|
@@ -218,6 +231,8 @@ module Logaling
|
|
218
231
|
records.each do |record|
|
219
232
|
record.key.delete
|
220
233
|
end
|
234
|
+
ensure
|
235
|
+
records.expression.close
|
221
236
|
end
|
222
237
|
|
223
238
|
def add_glossary_source(glossary_source, indexed_at)
|
@@ -232,6 +247,8 @@ module Logaling
|
|
232
247
|
records.each do |record|
|
233
248
|
record.key.delete
|
234
249
|
end
|
250
|
+
ensure
|
251
|
+
records.expression.close
|
235
252
|
end
|
236
253
|
|
237
254
|
def add_glossary(glossary_name)
|
@@ -246,6 +263,8 @@ module Logaling
|
|
246
263
|
records.each do |record|
|
247
264
|
record.key.delete
|
248
265
|
end
|
266
|
+
ensure
|
267
|
+
records.expression.close
|
249
268
|
end
|
250
269
|
|
251
270
|
def add_translation(glossary_name, glossary_source, source_language, target_language, source_term, target_term, note)
|
@@ -302,6 +321,7 @@ module Logaling
|
|
302
321
|
:key_normalize => true,
|
303
322
|
:default_tokenizer => "TokenBigram") do |table|
|
304
323
|
table.index("translations.source_term")
|
324
|
+
table.index("translations.target_term")
|
305
325
|
end
|
306
326
|
end
|
307
327
|
end
|
@@ -320,7 +340,24 @@ module Logaling
|
|
320
340
|
@database.nil? or @database.closed?
|
321
341
|
end
|
322
342
|
|
343
|
+
def struct_result(records, snippet=nil)
|
344
|
+
records.map do |record|
|
345
|
+
term = record.key
|
346
|
+
snipped_source_term = snippet ? snip_source_term(term, snippet) : []
|
347
|
+
snipped_target_term = snippet ? snip_target_term(term, snippet) : []
|
348
|
+
{:glossary_name => term.glossary.key,
|
349
|
+
:source_language => term.source_language,
|
350
|
+
:target_language => term.target_language,
|
351
|
+
:source_term => term.source_term,
|
352
|
+
:snipped_source_term => snipped_source_term,
|
353
|
+
:target_term => term.target_term,
|
354
|
+
:snipped_target_term => snipped_target_term,
|
355
|
+
:note => term.note || ''}
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
323
359
|
def struct_snipped_text(snipped_text)
|
360
|
+
return [] if snipped_text.empty?
|
324
361
|
word_list = snipped_text.split(/(<snippet>[^<]*<\/snippet>)/)
|
325
362
|
structed_source_term = word_list.map{|word|
|
326
363
|
replaced_word = word.sub(/<snippet>([^<]*)<\/snippet>/){|match| $1}
|
@@ -333,6 +370,16 @@ module Logaling
|
|
333
370
|
structed_source_term
|
334
371
|
end
|
335
372
|
|
373
|
+
def snip_source_term(term, snippet)
|
374
|
+
snipped_text = snippet.execute(term.source_term).join
|
375
|
+
struct_snipped_text(snipped_text)
|
376
|
+
end
|
377
|
+
|
378
|
+
def snip_target_term(term, snippet)
|
379
|
+
snipped_text = snippet.execute(term.target_term).join
|
380
|
+
struct_snipped_text(snipped_text)
|
381
|
+
end
|
382
|
+
|
336
383
|
def get_config(conf_key)
|
337
384
|
records = Groonga["configurations"].select do |record|
|
338
385
|
record.conf_key == conf_key
|
@@ -342,6 +389,8 @@ module Logaling
|
|
342
389
|
config.conf_value
|
343
390
|
end
|
344
391
|
value.size > 0 ? value[0] : ""
|
392
|
+
ensure
|
393
|
+
records.expression.close
|
345
394
|
end
|
346
395
|
|
347
396
|
def add_config(conf_key, conf_value)
|
data/lib/logaling/repository.rb
CHANGED
@@ -50,22 +50,26 @@ module Logaling
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
-
def lookup(source_term,
|
53
|
+
def lookup(source_term, glossary_source, dictionary=false)
|
54
54
|
raise GlossaryDBNotFound unless File.exist?(logaling_db_home)
|
55
55
|
|
56
56
|
terms = []
|
57
57
|
Logaling::GlossaryDB.open(logaling_db_home, "utf8") do |db|
|
58
|
-
|
58
|
+
if dictionary
|
59
|
+
terms = db.lookup_dictionary(source_term)
|
60
|
+
else
|
61
|
+
terms = db.lookup(source_term, glossary_source)
|
62
|
+
end
|
59
63
|
end
|
60
64
|
terms
|
61
65
|
end
|
62
66
|
|
63
|
-
def show_glossary(
|
67
|
+
def show_glossary(glossary_source)
|
64
68
|
raise GlossaryDBNotFound unless File.exist?(logaling_db_home)
|
65
69
|
|
66
70
|
terms = []
|
67
71
|
Logaling::GlossaryDB.open(logaling_db_home, "utf8") do |db|
|
68
|
-
terms = db.translation_list(
|
72
|
+
terms = db.translation_list(glossary_source)
|
69
73
|
end
|
70
74
|
terms
|
71
75
|
end
|
data/logaling-command.gemspec
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# -*-
|
1
|
+
# -*- coding: utf-8 -*-
|
2
2
|
#
|
3
3
|
# Copyright (C) 2011 Miho SUZUKI
|
4
4
|
#
|
@@ -16,7 +16,7 @@
|
|
16
16
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
18
|
$:.push File.expand_path("../lib", __FILE__)
|
19
|
-
require "logaling/command"
|
19
|
+
require "logaling/command/version"
|
20
20
|
|
21
21
|
Gem::Specification.new do |s|
|
22
22
|
s.name = "logaling-command"
|
@@ -41,7 +41,7 @@ Gem::Specification.new do |s|
|
|
41
41
|
s.add_runtime_dependency 'rroonga', ['>= 1.3.0']
|
42
42
|
s.add_runtime_dependency 'rainbow'
|
43
43
|
s.add_runtime_dependency 'nokogiri'
|
44
|
-
s.add_runtime_dependency '
|
44
|
+
s.add_runtime_dependency 'activesupport'
|
45
45
|
|
46
46
|
s.add_development_dependency 'rake'
|
47
47
|
s.add_development_dependency 'rspec'
|