logaling-command 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ # -*- coding: utf-8 -*-
2
+ # This program is free software: you can redistribute it and/or modify
3
+ # it under the terms of the GNU General Public License as published by
4
+ # the Free Software Foundation, either version 3 of the License, or
5
+ # (at your option) any later version.
6
+ #
7
+ # This program is distributed in the hope that it will be useful,
8
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
+ # GNU General Public License for more details.
11
+ #
12
+ # You should have received a copy of the GNU General Public License
13
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
14
+
15
+ require 'open-uri'
16
+ require 'nokogiri'
17
+
18
+ module Logaling
19
+ class MozillaJapan < ExternalGlossary
20
+ description 'Mozilla Japan (http://www.mozilla-japan.org/jp/l10n/term/l10n.html)'
21
+ source_language 'en'
22
+ target_language 'ja'
23
+ output_format 'csv'
24
+
25
+ private
26
+ def convert_to_csv(csv)
27
+ url = 'http://www.mozilla-japan.org/jp/l10n/term/l10n.html'
28
+ doc = ::Nokogiri::HTML(open(url, "r"))
29
+ doc.encoding = "UTF-8"
30
+ doc.search(".obsolete").remove
31
+ doc.search(".relate").remove
32
+ doc.css("dl[@class='terminology en-ja']").each do |dl|
33
+ dl.children.each_slice(2) do |dt, dd|
34
+ dd.text.split("|").each do |ddt|
35
+ ddt = ddt.gsub(/\s/, '')
36
+ unless ddt.empty?
37
+ csv << [dust_to_tilda(dt.text), dust_to_tilda(ddt)]
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ def dust_to_tilda(txt)
45
+ txt.gsub("\xEF\xBF\xBD", "\xE3\x80\x9C")
46
+ end
47
+ end
48
+ end
@@ -15,7 +15,12 @@
15
15
  # You should have received a copy of the GNU General Public License
16
16
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
17
 
18
- require 'psych'
18
+ begin
19
+ require 'psych'
20
+ rescue LoadError => e
21
+ raise LoadError unless e.message =~ /psych/
22
+ puts "please install psych first."
23
+ end
19
24
  require "yaml"
20
25
  require "csv"
21
26
  require "fileutils"
@@ -55,32 +60,28 @@ module Logaling
55
60
  end
56
61
  glossary
57
62
  end
58
-
59
- def build_path(glossary, source_language, target_language)
60
- fname = [glossary, source_language, target_language].join(".")
61
- File.join(LOGALING_HOME, "projects", glossary, "glossary", "#{fname}.yml")
62
- end
63
63
  end
64
+ attr_reader :glossary, :source_language, :target_language
64
65
 
65
- def initialize(glossary, source_language, target_language)
66
- @path = Glossary.build_path(glossary, source_language, target_language)
66
+ def initialize(glossary, source_language, target_language, logaling_home)
67
+ @logaling_home = logaling_home
67
68
  @glossary = glossary
68
69
  @source_language = source_language
69
70
  @target_language = target_language
70
71
  end
71
72
 
72
73
  def add(source_term, target_term, note)
73
- FileUtils.touch(@path) unless File.exist?(@path)
74
+ FileUtils.touch(source_path) unless File.exist?(source_path)
74
75
 
75
- glossary = Glossary.load_glossary(@path)
76
+ glossary = Glossary.load_glossary(source_path)
76
77
  glossary << build_term(source_term, target_term, note)
77
78
  dump_glossary(glossary)
78
79
  end
79
80
 
80
81
  def update(source_term, target_term, new_target_term, note)
81
- raise GlossaryNotFound unless File.exist?(@path)
82
+ raise GlossaryNotFound unless File.exist?(source_path)
82
83
 
83
- glossary = Glossary.load_glossary(@path)
84
+ glossary = Glossary.load_glossary(source_path)
84
85
 
85
86
  target_index = find_term_index(glossary, source_term, target_term)
86
87
  if target_index
@@ -92,9 +93,9 @@ module Logaling
92
93
  end
93
94
 
94
95
  def delete(source_term, target_term)
95
- raise GlossaryNotFound unless File.exist?(@path)
96
+ raise GlossaryNotFound unless File.exist?(source_path)
96
97
 
97
- glossary = Glossary.load_glossary(@path)
98
+ glossary = Glossary.load_glossary(source_path)
98
99
  target_index = find_term_index(glossary, source_term, target_term)
99
100
  unless target_index
100
101
  raise TermError, "Can't found term '#{source_term} #{target_term}' in '#{@glossary}'" unless target_index
@@ -105,9 +106,9 @@ module Logaling
105
106
  end
106
107
 
107
108
  def delete_all(source_term, force=false)
108
- raise GlossaryNotFound unless File.exist?(@path)
109
+ raise GlossaryNotFound unless File.exist?(source_path)
109
110
 
110
- glossary = Glossary.load_glossary(@path)
111
+ glossary = Glossary.load_glossary(source_path)
111
112
  delete_candidates = target_terms(glossary, source_term)
112
113
  if delete_candidates.empty?
113
114
  raise TermError, "Can't found term '#{source_term} in '#{@glossary}'"
@@ -123,6 +124,15 @@ module Logaling
123
124
  end
124
125
  end
125
126
 
127
+ def source_path
128
+ if @source_path
129
+ @source_path
130
+ else
131
+ fname = [@glossary, @source_language, @target_language].join(".")
132
+ @source_path = File.join(@logaling_home, "projects", @glossary, "glossary", "#{fname}.yml")
133
+ end
134
+ end
135
+
126
136
  private
127
137
  def build_term(source_term, target_term, note)
128
138
  note ||= ''
@@ -152,7 +162,7 @@ module Logaling
152
162
  end
153
163
 
154
164
  def dump_glossary(glossary)
155
- File.open(@path, "w") do |f|
165
+ File.open(source_path, "w") do |f|
156
166
  f.puts(glossary.to_yaml)
157
167
  end
158
168
  end
@@ -81,18 +81,22 @@ module Logaling
81
81
  end
82
82
  end
83
83
 
84
- def lookup(source_term, source_language, target_language, glossary)
84
+ def lookup(source_term, glossary_source=nil)
85
85
  records_selected = Groonga["translations"].select do |record|
86
86
  conditions = [record.source_term =~ source_term]
87
- conditions << (record.source_language =~ source_language) if source_language
88
- conditions << (record.target_language =~ target_language) if target_language
87
+ if glossary_source
88
+ conditions << (record.source_language =~ glossary_source.source_language) if glossary_source.source_language
89
+ conditions << (record.target_language =~ glossary_source.target_language) if glossary_source.target_language
90
+ end
89
91
  conditions
90
92
  end
91
- specified_glossary = records_selected.select do |record|
92
- record.glossary == glossary
93
- end
94
- specified_glossary.each do |record|
95
- record.key._score += 10
93
+ if glossary_source
94
+ specified_glossary = records_selected.select do |record|
95
+ record.glossary == glossary_source.glossary
96
+ end
97
+ specified_glossary.each do |record|
98
+ record.key._score += 10
99
+ end
96
100
  end
97
101
  records = records_selected.sort([
98
102
  {:key=>"_score", :order=>'descending'},
@@ -104,27 +108,55 @@ module Logaling
104
108
  :html_escape => true,
105
109
  :normalize => true}
106
110
  snippet = records_selected.expression.snippet(["<snippet>", "</snippet>"], options)
111
+ struct_result(records, snippet)
112
+ ensure
113
+ snippet.close if snippet
114
+ records_selected.expression.close if records_selected
115
+ specified_glossary.expression.close if specified_glossary
116
+ end
107
117
 
108
- snipped_source_term = []
109
- records.map do |record|
110
- term = record.key
111
- snipped_text = snippet.execute(term.source_term).join
112
- {:glossary_name => term.glossary.key,
113
- :source_language => term.source_language,
114
- :target_language => term.target_language,
115
- :source_term => term.source_term,
116
- :snipped_source_term => struct_snipped_text(snipped_text),
117
- :target_term => term.target_term,
118
- :note => term.note || ''}
118
+
119
+ def lookup_dictionary(search_word)
120
+ records_selected_source = Groonga["translations"].select do |record|
121
+ target = record.match_target do |match_record|
122
+ match_record.source_term * 2
123
+ end
124
+ target =~ search_word
125
+ end
126
+ completely_match = records_selected_source.select do |record|
127
+ record.source_term == search_word
128
+ end
129
+ completely_match.each do |record|
130
+ record.key._score += 10
119
131
  end
132
+
133
+ records_selected_target = Groonga["translations"].select do |record|
134
+ record.target_term =~ search_word
135
+ end
136
+
137
+ records_selected = records_selected_target.union!(records_selected_source)
138
+ records = records_selected.sort([
139
+ {:key=>"_score", :order=>'descending'},
140
+ {:key=>"source_term", :order=>'ascending'},
141
+ {:key=>"target_term", :order=>'ascending'}])
142
+
143
+ options = {:width => 100,
144
+ :html_escape => true,
145
+ :normalize => true}
146
+ snippet = records_selected.expression.snippet(["<snippet>", "</snippet>"], options)
147
+
148
+ struct_result(records, snippet)
149
+ ensure
150
+ snippet.close if snippet
151
+ records_selected.expression.close if records_selected
120
152
  end
121
153
 
122
- def translation_list(glossary, source_language, target_language)
154
+ def translation_list(glossary_source)
123
155
  records_raw = Groonga["translations"].select do |record|
124
156
  [
125
- record.glossary == glossary,
126
- record.source_language == source_language,
127
- record.target_language == target_language
157
+ record.glossary == glossary_source.glossary,
158
+ record.source_language == glossary_source.source_language,
159
+ record.target_language == glossary_source.target_language
128
160
  ]
129
161
  end
130
162
 
@@ -132,16 +164,9 @@ module Logaling
132
164
  {:key=>"source_term", :order=>'ascending'},
133
165
  {:key=>"target_term", :order=>'ascending'}])
134
166
 
135
- records.map do |record|
136
- term = record.key
137
-
138
- {:glossary_name => term.glossary.key,
139
- :source_language => term.source_language,
140
- :target_language => term.target_language,
141
- :source_term => term.source_term,
142
- :target_term => term.target_term,
143
- :note => term.note || ''}
144
- end
167
+ struct_result(records)
168
+ ensure
169
+ records_raw.expression.close
145
170
  end
146
171
 
147
172
  def get_bilingual_pair(source_term, target_term, glossary)
@@ -153,16 +178,9 @@ module Logaling
153
178
  ]
154
179
  end
155
180
 
156
- records.map do |record|
157
- term = record.key
158
-
159
- {:glossary_name => term.glossary,
160
- :source_language => term.source_language,
161
- :target_language => term.target_language,
162
- :source_term => term.source_term,
163
- :target_term => term.target_term,
164
- :note => term.note || ''}
165
- end
181
+ struct_result(records)
182
+ ensure
183
+ records.expression.close
166
184
  end
167
185
 
168
186
  def get_bilingual_pair_with_note(source_term, target_term, note, glossary)
@@ -175,16 +193,9 @@ module Logaling
175
193
  ]
176
194
  end
177
195
 
178
- records.map do |record|
179
- term = record.key
180
-
181
- {:glossary_name => term.glossary,
182
- :source_language => term.source_language,
183
- :target_language => term.target_language,
184
- :source_term => term.source_term,
185
- :target_term => term.target_term,
186
- :note => term.note || ''}
187
- end
196
+ struct_result(records)
197
+ ensure
198
+ records.expression.close
188
199
  end
189
200
 
190
201
  def glossary_source_exist?(glossary_source, indexed_at)
@@ -195,6 +206,8 @@ module Logaling
195
206
  ]
196
207
  end
197
208
  !glossary.size.zero?
209
+ ensure
210
+ glossary.expression.close
198
211
  end
199
212
 
200
213
  def get_all_glossary_source
@@ -218,6 +231,8 @@ module Logaling
218
231
  records.each do |record|
219
232
  record.key.delete
220
233
  end
234
+ ensure
235
+ records.expression.close
221
236
  end
222
237
 
223
238
  def add_glossary_source(glossary_source, indexed_at)
@@ -232,6 +247,8 @@ module Logaling
232
247
  records.each do |record|
233
248
  record.key.delete
234
249
  end
250
+ ensure
251
+ records.expression.close
235
252
  end
236
253
 
237
254
  def add_glossary(glossary_name)
@@ -246,6 +263,8 @@ module Logaling
246
263
  records.each do |record|
247
264
  record.key.delete
248
265
  end
266
+ ensure
267
+ records.expression.close
249
268
  end
250
269
 
251
270
  def add_translation(glossary_name, glossary_source, source_language, target_language, source_term, target_term, note)
@@ -302,6 +321,7 @@ module Logaling
302
321
  :key_normalize => true,
303
322
  :default_tokenizer => "TokenBigram") do |table|
304
323
  table.index("translations.source_term")
324
+ table.index("translations.target_term")
305
325
  end
306
326
  end
307
327
  end
@@ -320,7 +340,24 @@ module Logaling
320
340
  @database.nil? or @database.closed?
321
341
  end
322
342
 
343
+ def struct_result(records, snippet=nil)
344
+ records.map do |record|
345
+ term = record.key
346
+ snipped_source_term = snippet ? snip_source_term(term, snippet) : []
347
+ snipped_target_term = snippet ? snip_target_term(term, snippet) : []
348
+ {:glossary_name => term.glossary.key,
349
+ :source_language => term.source_language,
350
+ :target_language => term.target_language,
351
+ :source_term => term.source_term,
352
+ :snipped_source_term => snipped_source_term,
353
+ :target_term => term.target_term,
354
+ :snipped_target_term => snipped_target_term,
355
+ :note => term.note || ''}
356
+ end
357
+ end
358
+
323
359
  def struct_snipped_text(snipped_text)
360
+ return [] if snipped_text.empty?
324
361
  word_list = snipped_text.split(/(<snippet>[^<]*<\/snippet>)/)
325
362
  structed_source_term = word_list.map{|word|
326
363
  replaced_word = word.sub(/<snippet>([^<]*)<\/snippet>/){|match| $1}
@@ -333,6 +370,16 @@ module Logaling
333
370
  structed_source_term
334
371
  end
335
372
 
373
+ def snip_source_term(term, snippet)
374
+ snipped_text = snippet.execute(term.source_term).join
375
+ struct_snipped_text(snipped_text)
376
+ end
377
+
378
+ def snip_target_term(term, snippet)
379
+ snipped_text = snippet.execute(term.target_term).join
380
+ struct_snipped_text(snipped_text)
381
+ end
382
+
336
383
  def get_config(conf_key)
337
384
  records = Groonga["configurations"].select do |record|
338
385
  record.conf_key == conf_key
@@ -342,6 +389,8 @@ module Logaling
342
389
  config.conf_value
343
390
  end
344
391
  value.size > 0 ? value[0] : ""
392
+ ensure
393
+ records.expression.close
345
394
  end
346
395
 
347
396
  def add_config(conf_key, conf_value)
@@ -50,22 +50,26 @@ module Logaling
50
50
  end
51
51
  end
52
52
 
53
- def lookup(source_term, source_language, target_language, glossary)
53
+ def lookup(source_term, glossary_source, dictionary=false)
54
54
  raise GlossaryDBNotFound unless File.exist?(logaling_db_home)
55
55
 
56
56
  terms = []
57
57
  Logaling::GlossaryDB.open(logaling_db_home, "utf8") do |db|
58
- terms = db.lookup(source_term, source_language, target_language, glossary)
58
+ if dictionary
59
+ terms = db.lookup_dictionary(source_term)
60
+ else
61
+ terms = db.lookup(source_term, glossary_source)
62
+ end
59
63
  end
60
64
  terms
61
65
  end
62
66
 
63
- def show_glossary(glossary, source_language, target_language)
67
+ def show_glossary(glossary_source)
64
68
  raise GlossaryDBNotFound unless File.exist?(logaling_db_home)
65
69
 
66
70
  terms = []
67
71
  Logaling::GlossaryDB.open(logaling_db_home, "utf8") do |db|
68
- terms = db.translation_list(glossary, source_language, target_language)
72
+ terms = db.translation_list(glossary_source)
69
73
  end
70
74
  terms
71
75
  end
@@ -1,4 +1,4 @@
1
- # -*- encoding: utf-8 -*-
1
+ # -*- coding: utf-8 -*-
2
2
  #
3
3
  # Copyright (C) 2011 Miho SUZUKI
4
4
  #
@@ -16,7 +16,7 @@
16
16
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
17
 
18
18
  $:.push File.expand_path("../lib", __FILE__)
19
- require "logaling/command"
19
+ require "logaling/command/version"
20
20
 
21
21
  Gem::Specification.new do |s|
22
22
  s.name = "logaling-command"
@@ -41,7 +41,7 @@ Gem::Specification.new do |s|
41
41
  s.add_runtime_dependency 'rroonga', ['>= 1.3.0']
42
42
  s.add_runtime_dependency 'rainbow'
43
43
  s.add_runtime_dependency 'nokogiri'
44
- s.add_runtime_dependency 'active_support'
44
+ s.add_runtime_dependency 'activesupport'
45
45
 
46
46
  s.add_development_dependency 'rake'
47
47
  s.add_development_dependency 'rspec'