dict 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/lib/dict/dict.rb +1 -1
- data/lib/dict/dictionary.rb +2 -0
- data/lib/dict/result.rb +35 -0
- data/lib/dict/version.rb +1 -1
- data/lib/dict/wiktionary.rb +47 -13
- data/spec/dict/lib_dict_cli_runner_spec.rb +1 -1
- data/spec/dict/lib_dict_spec.rb +13 -16
- data/spec/dict/lib_wiktionary_spec.rb +12 -16
- data/spec/dict/spec/dict/vcr_cassettes/examples_krowa_cassette.yml +1040 -0
- data/spec/dict/spec/dict/vcr_cassettes/examples_samochod_cassette.yml +1089 -0
- data/spec/dict/spec/dict/vcr_cassettes/paired_value_samochod_cassette.yml +1089 -0
- data/spec/dict/spec/dict/vcr_cassettes/translations_krowa_cassette.yml +1040 -0
- data/spec/dict/spec/dict/vcr_cassettes/translations_samochod_cassette.yml +1089 -0
- metadata +8 -2
data/Gemfile.lock
CHANGED
data/lib/dict/dict.rb
CHANGED
@@ -29,7 +29,7 @@ module Dict
|
|
29
29
|
def get_single_dictionary_translations(word, dictionary)
|
30
30
|
case dictionary
|
31
31
|
when 'wiktionary'
|
32
|
-
Wiktionary.new(word).translate
|
32
|
+
Wiktionary.new(word).translate.translations
|
33
33
|
when 'dictpl'
|
34
34
|
Dictpl.new(word).translate
|
35
35
|
else Dictionary.message
|
data/lib/dict/dictionary.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
|
3
3
|
require 'open-uri'
|
4
|
+
require_relative 'result'
|
4
5
|
|
5
6
|
module Dict
|
6
7
|
class Dictionary
|
@@ -11,6 +12,7 @@ module Dict
|
|
11
12
|
@translations = []
|
12
13
|
@examples = []
|
13
14
|
@word = word
|
15
|
+
@result = Dict::Result.new(@word)
|
14
16
|
end
|
15
17
|
|
16
18
|
# returns hash with structure as showed below
|
data/lib/dict/result.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
module Dict
|
2
|
+
class Result
|
3
|
+
attr_reader :term, :translations, :examples
|
4
|
+
|
5
|
+
def initialize(term)
|
6
|
+
@term = term
|
7
|
+
@translations = {}
|
8
|
+
@examples = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_translation(term, translation)
|
12
|
+
add_result(@translations, term, translation)
|
13
|
+
end
|
14
|
+
|
15
|
+
def add_example(term, example)
|
16
|
+
add_result(@examples, term, example)
|
17
|
+
end
|
18
|
+
|
19
|
+
def each_translation
|
20
|
+
@translations.each_pair do |term,translation|
|
21
|
+
yield term, translation
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
def add_result(hash, key, value)
|
27
|
+
if hash.has_key?(key)
|
28
|
+
hash[key].push(value)
|
29
|
+
else
|
30
|
+
hash.merge!({ key => [value] })
|
31
|
+
end
|
32
|
+
self
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/dict/version.rb
CHANGED
data/lib/dict/wiktionary.rb
CHANGED
@@ -1,28 +1,62 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
|
3
3
|
require 'nokogiri'
|
4
|
-
|
4
|
+
require_relative 'dictionary'
|
5
5
|
|
6
6
|
WIKI_URL = 'http://en.wiktionary.org/wiki/'
|
7
7
|
|
8
8
|
module Dict
|
9
9
|
class Wiktionary < Dictionary
|
10
|
-
#
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
10
|
+
# Return a Dict::Result object
|
11
|
+
def translate
|
12
|
+
url_pl = "http://en.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
13
|
+
url_en = "http://pl.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
14
|
+
|
15
|
+
content = get_html(url_pl).css('textarea#wpTextbox1').first
|
16
|
+
if polish?(get_html(url_pl).css('textarea#wpTextbox1').first)
|
17
|
+
get_translations(get_html(url_pl).css('textarea#wpTextbox1').first)
|
18
|
+
else
|
19
|
+
get_translations(get_html(url_en).css('textarea#wpTextbox1').first.content, false)
|
18
20
|
end
|
19
|
-
|
20
|
-
|
21
|
-
|
21
|
+
|
22
|
+
@result
|
23
|
+
end
|
24
|
+
|
22
25
|
def get_html(url)
|
23
|
-
Nokogiri::HTML(open(url))
|
26
|
+
Nokogiri::HTML(open(URI.encode(url)))
|
24
27
|
rescue OpenURI::HTTPError
|
25
28
|
raise Dictionary::ConnectError
|
26
29
|
end
|
30
|
+
|
31
|
+
private
|
32
|
+
def polish?(content)
|
33
|
+
return true if /==Polish==/i.match(content)
|
34
|
+
false
|
35
|
+
end
|
36
|
+
|
37
|
+
def get_translations(content, polish = true)
|
38
|
+
if polish
|
39
|
+
translations = /Noun[^\{]+\{\{(?:head\|pl|pl\-noun)[^#]+#\s*\[\[([^\n]+)/.match(content)
|
40
|
+
translations = (translations && translations[1].gsub(/\[|\]/,'').split(', ')) || []
|
41
|
+
translations.each { |item| @result.add_translation(@result.term, item) }
|
42
|
+
else
|
43
|
+
translations_block = /angielski(?:.|\n)+\{\{znaczenia\}\}(.|\n)+(?:\{\{odmiana){1,}/.match(content)
|
44
|
+
return unless translations_block.instance_of?(MatchData)
|
45
|
+
translations_block = translations_block[0].gsub(/odmiana(.|\n)+$/,'')
|
46
|
+
translations = translations_block.scan(/:\s*\(\d\.?\d?\)\s*([^\n]+)/)
|
47
|
+
translations.map! do |translation|
|
48
|
+
translation[0].gsub(/\[|\]|\{\{[^\}]+\}\}|'/,'').strip
|
49
|
+
end
|
50
|
+
translations.delete_if do |item|
|
51
|
+
item.empty?
|
52
|
+
end
|
53
|
+
translations ||= []
|
54
|
+
translations.each { |item| @result.add_translation(@result.term, item) }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def get_examples(content, polish = true)
|
59
|
+
# todo
|
60
|
+
end
|
27
61
|
end
|
28
62
|
end
|
@@ -37,7 +37,7 @@ describe "get_translations" do
|
|
37
37
|
stub_const("ARGV", ["słowik"])
|
38
38
|
runner = Dict::CLI::Runner.new
|
39
39
|
opts = runner.parse_parameters
|
40
|
-
runner.get_translations(opts, "słowik").should == {"wiktionary"=>{}, "dictpl"=>{"słowik"=>["nightingale"], "słowik białobrewy; Luscinia indicus; Tarsiger indicus (gatunek ptaka)"=>["white-browed bush-robin"], "słowik białosterny; Luscinia pectoralis (gatunek ptaka)"=>["Himalayan rubythroat", "white-tailed rubythroat"], "słowik chiński; pekińczyk żółty; Leiothrix lutea"=>["Pekin robin", "red-billed leiothrix"], "słowik chiński; pekińczyk żółty; pekińczyk koralodzioby; Leiothrix lutea"=>["Peking robin"], "słowik czarnogardły; Luscinia obscura"=>["black-throated blue robin"], "słowik himalajski; Luscinia brunnea (gatunek ptaka)"=>["Indian blue chat", "Indian blue robin"], "słowik modry; Luscinia cyane"=>["Siberian blue robin"], "słowik obrożny; Luscinia johnstoniae; Tarsiger johnstoniae (gatunek ptaka)"=>["collared bush-robin"]}}
|
40
|
+
runner.get_translations(opts, "słowik").should == {"wiktionary"=>{"słowik"=>["nightingale"]}, "dictpl"=>{"słowik"=>["nightingale"], "słowik białobrewy; Luscinia indicus; Tarsiger indicus (gatunek ptaka)"=>["white-browed bush-robin"], "słowik białosterny; Luscinia pectoralis (gatunek ptaka)"=>["Himalayan rubythroat", "white-tailed rubythroat"], "słowik chiński; pekińczyk żółty; Leiothrix lutea"=>["Pekin robin", "red-billed leiothrix"], "słowik chiński; pekińczyk żółty; pekińczyk koralodzioby; Leiothrix lutea"=>["Peking robin"], "słowik czarnogardły; Luscinia obscura"=>["black-throated blue robin"], "słowik himalajski; Luscinia brunnea (gatunek ptaka)"=>["Indian blue chat", "Indian blue robin"], "słowik modry; Luscinia cyane"=>["Siberian blue robin"], "słowik obrożny; Luscinia johnstoniae; Tarsiger johnstoniae (gatunek ptaka)"=>["collared bush-robin"]}}
|
41
41
|
end
|
42
42
|
|
43
43
|
it "should return results from selected dictionary for word 'słowik'" do
|
data/spec/dict/lib_dict_spec.rb
CHANGED
@@ -4,24 +4,21 @@ require 'dict/dict'
|
|
4
4
|
|
5
5
|
describe Dict do
|
6
6
|
|
7
|
-
it "should get single translation from dictionary with two arguments given" do
|
8
|
-
expect{
|
9
|
-
Dict.get_single_dictionary_translations('samochód', 'dictpl')
|
10
|
-
}.to_not raise_error
|
11
|
-
end
|
12
|
-
|
13
|
-
it "should return hash with translations from all dictionaries" do
|
14
|
-
Dict.get_all_dictionaries_translations('samochód').should be_a(Hash)
|
15
|
-
end
|
16
|
-
|
17
7
|
it "should return array of available services which is not empty" do
|
18
8
|
arr = Dict.available_dictionaries
|
19
|
-
|
20
|
-
|
9
|
+
arr.should be_a(Array)
|
10
|
+
arr.size.should_not == 0
|
21
11
|
end
|
22
12
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
13
|
+
it "should return array of available services, which contains wiktionary and dictpl" do
|
14
|
+
Dict.available_dictionaries.should == ['wiktionary', 'dictpl']
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
it "should return whatever Dictpl returns embedded in a hash" do
|
20
|
+
dictpl = stub(:translate => 'DICTPL_RESULTS')
|
21
|
+
Dict::Dictpl.should_receive(:new).with('WORD').and_return(dictpl)
|
22
|
+
Dict.get_single_dictionary_translations('WORD', 'dictpl').should == 'DICTPL_RESULTS'
|
23
|
+
end
|
27
24
|
end
|
@@ -9,26 +9,22 @@ describe Dict::Wiktionary do
|
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should return an two element array of translations of word samochód containing [\"car\",\"automobile\"]" do
|
12
|
-
w = Dict::Wiktionary.new('samochód')
|
13
|
-
w.
|
14
|
-
w.translations.should == ["car", "automobile"]
|
12
|
+
w = Dict::Wiktionary.new('samochód').translate
|
13
|
+
w.translations.should == {"samochód"=>["car", "automobile"]}
|
15
14
|
end
|
16
15
|
|
17
|
-
it "should return
|
18
|
-
w = Dict::Wiktionary.new('samochód')
|
19
|
-
w.
|
20
|
-
w.translations.should be_a(Array)
|
16
|
+
it "should return a hash with translations" do
|
17
|
+
w = Dict::Wiktionary.new('samochód').translate
|
18
|
+
w.translations.should be_a(Hash)
|
21
19
|
end
|
22
20
|
|
23
|
-
it "should return
|
24
|
-
w = Dict::Wiktionary.new('samochód')
|
25
|
-
w.
|
26
|
-
w.examples.should be_a(Array)
|
21
|
+
it "should return a Resut object" do
|
22
|
+
w = Dict::Wiktionary.new('samochód').translate
|
23
|
+
w.should be_a(Dict::Result)
|
27
24
|
end
|
28
|
-
|
29
|
-
it "should return
|
30
|
-
|
31
|
-
|
25
|
+
|
26
|
+
it "should return translation from english to polish for word 'field'" do
|
27
|
+
result = Dict::Wiktionary.new('field').translate.translations
|
28
|
+
result.should eq({"field"=>["pole", "pole (magnetyczne, elektryczne, sił, itp.)", "pole (skalarne, wektorowe, itp.)", "ciało (liczb rzeczywistych, zespolonych, itp.)", "wystawić (drużynę)", "odpowiadać (na pytania)", "polowy", "polny"]})
|
32
29
|
end
|
33
|
-
|
34
30
|
end
|