logaling-command 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,48 @@
1
+ # -*- coding: utf-8 -*-
2
+ # This program is free software: you can redistribute it and/or modify
3
+ # it under the terms of the GNU General Public License as published by
4
+ # the Free Software Foundation, either version 3 of the License, or
5
+ # (at your option) any later version.
6
+ #
7
+ # This program is distributed in the hope that it will be useful,
8
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
+ # GNU General Public License for more details.
11
+ #
12
+ # You should have received a copy of the GNU General Public License
13
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
14
+
15
+ require 'open-uri'
16
+ require 'nokogiri'
17
+
18
+ module Logaling
19
+ class MozillaJapan < ExternalGlossary
20
+ description 'Mozilla Japan (http://www.mozilla-japan.org/jp/l10n/term/l10n.html)'
21
+ source_language 'en'
22
+ target_language 'ja'
23
+ output_format 'csv'
24
+
25
+ private
26
+ def convert_to_csv(csv)
27
+ url = 'http://www.mozilla-japan.org/jp/l10n/term/l10n.html'
28
+ doc = ::Nokogiri::HTML(open(url, "r"))
29
+ doc.encoding = "UTF-8"
30
+ doc.search(".obsolete").remove
31
+ doc.search(".relate").remove
32
+ doc.css("dl[@class='terminology en-ja']").each do |dl|
33
+ dl.children.each_slice(2) do |dt, dd|
34
+ dd.text.split("|").each do |ddt|
35
+ ddt = ddt.gsub(/\s/, '')
36
+ unless ddt.empty?
37
+ csv << [dust_to_tilda(dt.text), dust_to_tilda(ddt)]
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ def dust_to_tilda(txt)
45
+ txt.gsub("\xEF\xBF\xBD", "\xE3\x80\x9C")
46
+ end
47
+ end
48
+ end
@@ -15,7 +15,12 @@
15
15
  # You should have received a copy of the GNU General Public License
16
16
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
17
 
18
- require 'psych'
18
+ begin
19
+ require 'psych'
20
+ rescue LoadError => e
21
+ raise LoadError unless e.message =~ /psych/
22
+ puts "please install psych first."
23
+ end
19
24
  require "yaml"
20
25
  require "csv"
21
26
  require "fileutils"
@@ -55,32 +60,28 @@ module Logaling
55
60
  end
56
61
  glossary
57
62
  end
58
-
59
- def build_path(glossary, source_language, target_language)
60
- fname = [glossary, source_language, target_language].join(".")
61
- File.join(LOGALING_HOME, "projects", glossary, "glossary", "#{fname}.yml")
62
- end
63
63
  end
64
+ attr_reader :glossary, :source_language, :target_language
64
65
 
65
- def initialize(glossary, source_language, target_language)
66
- @path = Glossary.build_path(glossary, source_language, target_language)
66
+ def initialize(glossary, source_language, target_language, logaling_home)
67
+ @logaling_home = logaling_home
67
68
  @glossary = glossary
68
69
  @source_language = source_language
69
70
  @target_language = target_language
70
71
  end
71
72
 
72
73
  def add(source_term, target_term, note)
73
- FileUtils.touch(@path) unless File.exist?(@path)
74
+ FileUtils.touch(source_path) unless File.exist?(source_path)
74
75
 
75
- glossary = Glossary.load_glossary(@path)
76
+ glossary = Glossary.load_glossary(source_path)
76
77
  glossary << build_term(source_term, target_term, note)
77
78
  dump_glossary(glossary)
78
79
  end
79
80
 
80
81
  def update(source_term, target_term, new_target_term, note)
81
- raise GlossaryNotFound unless File.exist?(@path)
82
+ raise GlossaryNotFound unless File.exist?(source_path)
82
83
 
83
- glossary = Glossary.load_glossary(@path)
84
+ glossary = Glossary.load_glossary(source_path)
84
85
 
85
86
  target_index = find_term_index(glossary, source_term, target_term)
86
87
  if target_index
@@ -92,9 +93,9 @@ module Logaling
92
93
  end
93
94
 
94
95
  def delete(source_term, target_term)
95
- raise GlossaryNotFound unless File.exist?(@path)
96
+ raise GlossaryNotFound unless File.exist?(source_path)
96
97
 
97
- glossary = Glossary.load_glossary(@path)
98
+ glossary = Glossary.load_glossary(source_path)
98
99
  target_index = find_term_index(glossary, source_term, target_term)
99
100
  unless target_index
100
101
  raise TermError, "Can't found term '#{source_term} #{target_term}' in '#{@glossary}'" unless target_index
@@ -105,9 +106,9 @@ module Logaling
105
106
  end
106
107
 
107
108
  def delete_all(source_term, force=false)
108
- raise GlossaryNotFound unless File.exist?(@path)
109
+ raise GlossaryNotFound unless File.exist?(source_path)
109
110
 
110
- glossary = Glossary.load_glossary(@path)
111
+ glossary = Glossary.load_glossary(source_path)
111
112
  delete_candidates = target_terms(glossary, source_term)
112
113
  if delete_candidates.empty?
113
114
  raise TermError, "Can't found term '#{source_term} in '#{@glossary}'"
@@ -123,6 +124,15 @@ module Logaling
123
124
  end
124
125
  end
125
126
 
127
+ def source_path
128
+ if @source_path
129
+ @source_path
130
+ else
131
+ fname = [@glossary, @source_language, @target_language].join(".")
132
+ @source_path = File.join(@logaling_home, "projects", @glossary, "glossary", "#{fname}.yml")
133
+ end
134
+ end
135
+
126
136
  private
127
137
  def build_term(source_term, target_term, note)
128
138
  note ||= ''
@@ -152,7 +162,7 @@ module Logaling
152
162
  end
153
163
 
154
164
  def dump_glossary(glossary)
155
- File.open(@path, "w") do |f|
165
+ File.open(source_path, "w") do |f|
156
166
  f.puts(glossary.to_yaml)
157
167
  end
158
168
  end
@@ -81,18 +81,22 @@ module Logaling
81
81
  end
82
82
  end
83
83
 
84
- def lookup(source_term, source_language, target_language, glossary)
84
+ def lookup(source_term, glossary_source=nil)
85
85
  records_selected = Groonga["translations"].select do |record|
86
86
  conditions = [record.source_term =~ source_term]
87
- conditions << (record.source_language =~ source_language) if source_language
88
- conditions << (record.target_language =~ target_language) if target_language
87
+ if glossary_source
88
+ conditions << (record.source_language =~ glossary_source.source_language) if glossary_source.source_language
89
+ conditions << (record.target_language =~ glossary_source.target_language) if glossary_source.target_language
90
+ end
89
91
  conditions
90
92
  end
91
- specified_glossary = records_selected.select do |record|
92
- record.glossary == glossary
93
- end
94
- specified_glossary.each do |record|
95
- record.key._score += 10
93
+ if glossary_source
94
+ specified_glossary = records_selected.select do |record|
95
+ record.glossary == glossary_source.glossary
96
+ end
97
+ specified_glossary.each do |record|
98
+ record.key._score += 10
99
+ end
96
100
  end
97
101
  records = records_selected.sort([
98
102
  {:key=>"_score", :order=>'descending'},
@@ -104,27 +108,55 @@ module Logaling
104
108
  :html_escape => true,
105
109
  :normalize => true}
106
110
  snippet = records_selected.expression.snippet(["<snippet>", "</snippet>"], options)
111
+ struct_result(records, snippet)
112
+ ensure
113
+ snippet.close if snippet
114
+ records_selected.expression.close if records_selected
115
+ specified_glossary.expression.close if specified_glossary
116
+ end
107
117
 
108
- snipped_source_term = []
109
- records.map do |record|
110
- term = record.key
111
- snipped_text = snippet.execute(term.source_term).join
112
- {:glossary_name => term.glossary.key,
113
- :source_language => term.source_language,
114
- :target_language => term.target_language,
115
- :source_term => term.source_term,
116
- :snipped_source_term => struct_snipped_text(snipped_text),
117
- :target_term => term.target_term,
118
- :note => term.note || ''}
118
+
119
+ def lookup_dictionary(search_word)
120
+ records_selected_source = Groonga["translations"].select do |record|
121
+ target = record.match_target do |match_record|
122
+ match_record.source_term * 2
123
+ end
124
+ target =~ search_word
125
+ end
126
+ completely_match = records_selected_source.select do |record|
127
+ record.source_term == search_word
128
+ end
129
+ completely_match.each do |record|
130
+ record.key._score += 10
119
131
  end
132
+
133
+ records_selected_target = Groonga["translations"].select do |record|
134
+ record.target_term =~ search_word
135
+ end
136
+
137
+ records_selected = records_selected_target.union!(records_selected_source)
138
+ records = records_selected.sort([
139
+ {:key=>"_score", :order=>'descending'},
140
+ {:key=>"source_term", :order=>'ascending'},
141
+ {:key=>"target_term", :order=>'ascending'}])
142
+
143
+ options = {:width => 100,
144
+ :html_escape => true,
145
+ :normalize => true}
146
+ snippet = records_selected.expression.snippet(["<snippet>", "</snippet>"], options)
147
+
148
+ struct_result(records, snippet)
149
+ ensure
150
+ snippet.close if snippet
151
+ records_selected.expression.close if records_selected
120
152
  end
121
153
 
122
- def translation_list(glossary, source_language, target_language)
154
+ def translation_list(glossary_source)
123
155
  records_raw = Groonga["translations"].select do |record|
124
156
  [
125
- record.glossary == glossary,
126
- record.source_language == source_language,
127
- record.target_language == target_language
157
+ record.glossary == glossary_source.glossary,
158
+ record.source_language == glossary_source.source_language,
159
+ record.target_language == glossary_source.target_language
128
160
  ]
129
161
  end
130
162
 
@@ -132,16 +164,9 @@ module Logaling
132
164
  {:key=>"source_term", :order=>'ascending'},
133
165
  {:key=>"target_term", :order=>'ascending'}])
134
166
 
135
- records.map do |record|
136
- term = record.key
137
-
138
- {:glossary_name => term.glossary.key,
139
- :source_language => term.source_language,
140
- :target_language => term.target_language,
141
- :source_term => term.source_term,
142
- :target_term => term.target_term,
143
- :note => term.note || ''}
144
- end
167
+ struct_result(records)
168
+ ensure
169
+ records_raw.expression.close
145
170
  end
146
171
 
147
172
  def get_bilingual_pair(source_term, target_term, glossary)
@@ -153,16 +178,9 @@ module Logaling
153
178
  ]
154
179
  end
155
180
 
156
- records.map do |record|
157
- term = record.key
158
-
159
- {:glossary_name => term.glossary,
160
- :source_language => term.source_language,
161
- :target_language => term.target_language,
162
- :source_term => term.source_term,
163
- :target_term => term.target_term,
164
- :note => term.note || ''}
165
- end
181
+ struct_result(records)
182
+ ensure
183
+ records.expression.close
166
184
  end
167
185
 
168
186
  def get_bilingual_pair_with_note(source_term, target_term, note, glossary)
@@ -175,16 +193,9 @@ module Logaling
175
193
  ]
176
194
  end
177
195
 
178
- records.map do |record|
179
- term = record.key
180
-
181
- {:glossary_name => term.glossary,
182
- :source_language => term.source_language,
183
- :target_language => term.target_language,
184
- :source_term => term.source_term,
185
- :target_term => term.target_term,
186
- :note => term.note || ''}
187
- end
196
+ struct_result(records)
197
+ ensure
198
+ records.expression.close
188
199
  end
189
200
 
190
201
  def glossary_source_exist?(glossary_source, indexed_at)
@@ -195,6 +206,8 @@ module Logaling
195
206
  ]
196
207
  end
197
208
  !glossary.size.zero?
209
+ ensure
210
+ glossary.expression.close
198
211
  end
199
212
 
200
213
  def get_all_glossary_source
@@ -218,6 +231,8 @@ module Logaling
218
231
  records.each do |record|
219
232
  record.key.delete
220
233
  end
234
+ ensure
235
+ records.expression.close
221
236
  end
222
237
 
223
238
  def add_glossary_source(glossary_source, indexed_at)
@@ -232,6 +247,8 @@ module Logaling
232
247
  records.each do |record|
233
248
  record.key.delete
234
249
  end
250
+ ensure
251
+ records.expression.close
235
252
  end
236
253
 
237
254
  def add_glossary(glossary_name)
@@ -246,6 +263,8 @@ module Logaling
246
263
  records.each do |record|
247
264
  record.key.delete
248
265
  end
266
+ ensure
267
+ records.expression.close
249
268
  end
250
269
 
251
270
  def add_translation(glossary_name, glossary_source, source_language, target_language, source_term, target_term, note)
@@ -302,6 +321,7 @@ module Logaling
302
321
  :key_normalize => true,
303
322
  :default_tokenizer => "TokenBigram") do |table|
304
323
  table.index("translations.source_term")
324
+ table.index("translations.target_term")
305
325
  end
306
326
  end
307
327
  end
@@ -320,7 +340,24 @@ module Logaling
320
340
  @database.nil? or @database.closed?
321
341
  end
322
342
 
343
+ def struct_result(records, snippet=nil)
344
+ records.map do |record|
345
+ term = record.key
346
+ snipped_source_term = snippet ? snip_source_term(term, snippet) : []
347
+ snipped_target_term = snippet ? snip_target_term(term, snippet) : []
348
+ {:glossary_name => term.glossary.key,
349
+ :source_language => term.source_language,
350
+ :target_language => term.target_language,
351
+ :source_term => term.source_term,
352
+ :snipped_source_term => snipped_source_term,
353
+ :target_term => term.target_term,
354
+ :snipped_target_term => snipped_target_term,
355
+ :note => term.note || ''}
356
+ end
357
+ end
358
+
323
359
  def struct_snipped_text(snipped_text)
360
+ return [] if snipped_text.empty?
324
361
  word_list = snipped_text.split(/(<snippet>[^<]*<\/snippet>)/)
325
362
  structed_source_term = word_list.map{|word|
326
363
  replaced_word = word.sub(/<snippet>([^<]*)<\/snippet>/){|match| $1}
@@ -333,6 +370,16 @@ module Logaling
333
370
  structed_source_term
334
371
  end
335
372
 
373
+ def snip_source_term(term, snippet)
374
+ snipped_text = snippet.execute(term.source_term).join
375
+ struct_snipped_text(snipped_text)
376
+ end
377
+
378
+ def snip_target_term(term, snippet)
379
+ snipped_text = snippet.execute(term.target_term).join
380
+ struct_snipped_text(snipped_text)
381
+ end
382
+
336
383
  def get_config(conf_key)
337
384
  records = Groonga["configurations"].select do |record|
338
385
  record.conf_key == conf_key
@@ -342,6 +389,8 @@ module Logaling
342
389
  config.conf_value
343
390
  end
344
391
  value.size > 0 ? value[0] : ""
392
+ ensure
393
+ records.expression.close
345
394
  end
346
395
 
347
396
  def add_config(conf_key, conf_value)
@@ -50,22 +50,26 @@ module Logaling
50
50
  end
51
51
  end
52
52
 
53
- def lookup(source_term, source_language, target_language, glossary)
53
+ def lookup(source_term, glossary_source, dictionary=false)
54
54
  raise GlossaryDBNotFound unless File.exist?(logaling_db_home)
55
55
 
56
56
  terms = []
57
57
  Logaling::GlossaryDB.open(logaling_db_home, "utf8") do |db|
58
- terms = db.lookup(source_term, source_language, target_language, glossary)
58
+ if dictionary
59
+ terms = db.lookup_dictionary(source_term)
60
+ else
61
+ terms = db.lookup(source_term, glossary_source)
62
+ end
59
63
  end
60
64
  terms
61
65
  end
62
66
 
63
- def show_glossary(glossary, source_language, target_language)
67
+ def show_glossary(glossary_source)
64
68
  raise GlossaryDBNotFound unless File.exist?(logaling_db_home)
65
69
 
66
70
  terms = []
67
71
  Logaling::GlossaryDB.open(logaling_db_home, "utf8") do |db|
68
- terms = db.translation_list(glossary, source_language, target_language)
72
+ terms = db.translation_list(glossary_source)
69
73
  end
70
74
  terms
71
75
  end
@@ -1,4 +1,4 @@
1
- # -*- encoding: utf-8 -*-
1
+ # -*- coding: utf-8 -*-
2
2
  #
3
3
  # Copyright (C) 2011 Miho SUZUKI
4
4
  #
@@ -16,7 +16,7 @@
16
16
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
17
 
18
18
  $:.push File.expand_path("../lib", __FILE__)
19
- require "logaling/command"
19
+ require "logaling/command/version"
20
20
 
21
21
  Gem::Specification.new do |s|
22
22
  s.name = "logaling-command"
@@ -41,7 +41,7 @@ Gem::Specification.new do |s|
41
41
  s.add_runtime_dependency 'rroonga', ['>= 1.3.0']
42
42
  s.add_runtime_dependency 'rainbow'
43
43
  s.add_runtime_dependency 'nokogiri'
44
- s.add_runtime_dependency 'active_support'
44
+ s.add_runtime_dependency 'activesupport'
45
45
 
46
46
  s.add_development_dependency 'rake'
47
47
  s.add_development_dependency 'rspec'