dict 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/lib/dict/dict.rb +1 -1
- data/lib/dict/dictionary.rb +2 -0
- data/lib/dict/result.rb +35 -0
- data/lib/dict/version.rb +1 -1
- data/lib/dict/wiktionary.rb +47 -13
- data/spec/dict/lib_dict_cli_runner_spec.rb +1 -1
- data/spec/dict/lib_dict_spec.rb +13 -16
- data/spec/dict/lib_wiktionary_spec.rb +12 -16
- data/spec/dict/spec/dict/vcr_cassettes/examples_krowa_cassette.yml +1040 -0
- data/spec/dict/spec/dict/vcr_cassettes/examples_samochod_cassette.yml +1089 -0
- data/spec/dict/spec/dict/vcr_cassettes/paired_value_samochod_cassette.yml +1089 -0
- data/spec/dict/spec/dict/vcr_cassettes/translations_krowa_cassette.yml +1040 -0
- data/spec/dict/spec/dict/vcr_cassettes/translations_samochod_cassette.yml +1089 -0
- metadata +8 -2
data/Gemfile.lock
CHANGED
data/lib/dict/dict.rb
CHANGED
@@ -29,7 +29,7 @@ module Dict
|
|
29
29
|
def get_single_dictionary_translations(word, dictionary)
|
30
30
|
case dictionary
|
31
31
|
when 'wiktionary'
|
32
|
-
Wiktionary.new(word).translate
|
32
|
+
Wiktionary.new(word).translate.translations
|
33
33
|
when 'dictpl'
|
34
34
|
Dictpl.new(word).translate
|
35
35
|
else Dictionary.message
|
data/lib/dict/dictionary.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
|
3
3
|
require 'open-uri'
|
4
|
+
require_relative 'result'
|
4
5
|
|
5
6
|
module Dict
|
6
7
|
class Dictionary
|
@@ -11,6 +12,7 @@ module Dict
|
|
11
12
|
@translations = []
|
12
13
|
@examples = []
|
13
14
|
@word = word
|
15
|
+
@result = Dict::Result.new(@word)
|
14
16
|
end
|
15
17
|
|
16
18
|
# returns hash with structure as showed below
|
data/lib/dict/result.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
module Dict
|
2
|
+
class Result
|
3
|
+
attr_reader :term, :translations, :examples
|
4
|
+
|
5
|
+
def initialize(term)
|
6
|
+
@term = term
|
7
|
+
@translations = {}
|
8
|
+
@examples = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_translation(term, translation)
|
12
|
+
add_result(@translations, term, translation)
|
13
|
+
end
|
14
|
+
|
15
|
+
def add_example(term, example)
|
16
|
+
add_result(@examples, term, example)
|
17
|
+
end
|
18
|
+
|
19
|
+
def each_translation
|
20
|
+
@translations.each_pair do |term,translation|
|
21
|
+
yield term, translation
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
def add_result(hash, key, value)
|
27
|
+
if hash.has_key?(key)
|
28
|
+
hash[key].push(value)
|
29
|
+
else
|
30
|
+
hash.merge!({ key => [value] })
|
31
|
+
end
|
32
|
+
self
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/dict/version.rb
CHANGED
data/lib/dict/wiktionary.rb
CHANGED
@@ -1,28 +1,62 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
|
3
3
|
require 'nokogiri'
|
4
|
-
|
4
|
+
require_relative 'dictionary'
|
5
5
|
|
6
6
|
WIKI_URL = 'http://en.wiktionary.org/wiki/'
|
7
7
|
|
8
8
|
module Dict
|
9
9
|
class Wiktionary < Dictionary
|
10
|
-
#
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
10
|
+
# Return a Dict::Result object
|
11
|
+
def translate
|
12
|
+
url_pl = "http://en.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
13
|
+
url_en = "http://pl.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
14
|
+
|
15
|
+
content = get_html(url_pl).css('textarea#wpTextbox1').first
|
16
|
+
if polish?(get_html(url_pl).css('textarea#wpTextbox1').first)
|
17
|
+
get_translations(get_html(url_pl).css('textarea#wpTextbox1').first)
|
18
|
+
else
|
19
|
+
get_translations(get_html(url_en).css('textarea#wpTextbox1').first.content, false)
|
18
20
|
end
|
19
|
-
|
20
|
-
|
21
|
-
|
21
|
+
|
22
|
+
@result
|
23
|
+
end
|
24
|
+
|
22
25
|
def get_html(url)
|
23
|
-
Nokogiri::HTML(open(url))
|
26
|
+
Nokogiri::HTML(open(URI.encode(url)))
|
24
27
|
rescue OpenURI::HTTPError
|
25
28
|
raise Dictionary::ConnectError
|
26
29
|
end
|
30
|
+
|
31
|
+
private
|
32
|
+
def polish?(content)
|
33
|
+
return true if /==Polish==/i.match(content)
|
34
|
+
false
|
35
|
+
end
|
36
|
+
|
37
|
+
def get_translations(content, polish = true)
|
38
|
+
if polish
|
39
|
+
translations = /Noun[^\{]+\{\{(?:head\|pl|pl\-noun)[^#]+#\s*\[\[([^\n]+)/.match(content)
|
40
|
+
translations = (translations && translations[1].gsub(/\[|\]/,'').split(', ')) || []
|
41
|
+
translations.each { |item| @result.add_translation(@result.term, item) }
|
42
|
+
else
|
43
|
+
translations_block = /angielski(?:.|\n)+\{\{znaczenia\}\}(.|\n)+(?:\{\{odmiana){1,}/.match(content)
|
44
|
+
return unless translations_block.instance_of?(MatchData)
|
45
|
+
translations_block = translations_block[0].gsub(/odmiana(.|\n)+$/,'')
|
46
|
+
translations = translations_block.scan(/:\s*\(\d\.?\d?\)\s*([^\n]+)/)
|
47
|
+
translations.map! do |translation|
|
48
|
+
translation[0].gsub(/\[|\]|\{\{[^\}]+\}\}|'/,'').strip
|
49
|
+
end
|
50
|
+
translations.delete_if do |item|
|
51
|
+
item.empty?
|
52
|
+
end
|
53
|
+
translations ||= []
|
54
|
+
translations.each { |item| @result.add_translation(@result.term, item) }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def get_examples(content, polish = true)
|
59
|
+
# todo
|
60
|
+
end
|
27
61
|
end
|
28
62
|
end
|
@@ -37,7 +37,7 @@ describe "get_translations" do
|
|
37
37
|
stub_const("ARGV", ["słowik"])
|
38
38
|
runner = Dict::CLI::Runner.new
|
39
39
|
opts = runner.parse_parameters
|
40
|
-
runner.get_translations(opts, "słowik").should == {"wiktionary"=>{}, "dictpl"=>{"słowik"=>["nightingale"], "słowik białobrewy; Luscinia indicus; Tarsiger indicus (gatunek ptaka)"=>["white-browed bush-robin"], "słowik białosterny; Luscinia pectoralis (gatunek ptaka)"=>["Himalayan rubythroat", "white-tailed rubythroat"], "słowik chiński; pekińczyk żółty; Leiothrix lutea"=>["Pekin robin", "red-billed leiothrix"], "słowik chiński; pekińczyk żółty; pekińczyk koralodzioby; Leiothrix lutea"=>["Peking robin"], "słowik czarnogardły; Luscinia obscura"=>["black-throated blue robin"], "słowik himalajski; Luscinia brunnea (gatunek ptaka)"=>["Indian blue chat", "Indian blue robin"], "słowik modry; Luscinia cyane"=>["Siberian blue robin"], "słowik obrożny; Luscinia johnstoniae; Tarsiger johnstoniae (gatunek ptaka)"=>["collared bush-robin"]}}
|
40
|
+
runner.get_translations(opts, "słowik").should == {"wiktionary"=>{"słowik"=>["nightingale"]}, "dictpl"=>{"słowik"=>["nightingale"], "słowik białobrewy; Luscinia indicus; Tarsiger indicus (gatunek ptaka)"=>["white-browed bush-robin"], "słowik białosterny; Luscinia pectoralis (gatunek ptaka)"=>["Himalayan rubythroat", "white-tailed rubythroat"], "słowik chiński; pekińczyk żółty; Leiothrix lutea"=>["Pekin robin", "red-billed leiothrix"], "słowik chiński; pekińczyk żółty; pekińczyk koralodzioby; Leiothrix lutea"=>["Peking robin"], "słowik czarnogardły; Luscinia obscura"=>["black-throated blue robin"], "słowik himalajski; Luscinia brunnea (gatunek ptaka)"=>["Indian blue chat", "Indian blue robin"], "słowik modry; Luscinia cyane"=>["Siberian blue robin"], "słowik obrożny; Luscinia johnstoniae; Tarsiger johnstoniae (gatunek ptaka)"=>["collared bush-robin"]}}
|
41
41
|
end
|
42
42
|
|
43
43
|
it "should return results from selected dictionary for word 'słowik'" do
|
data/spec/dict/lib_dict_spec.rb
CHANGED
@@ -4,24 +4,21 @@ require 'dict/dict'
|
|
4
4
|
|
5
5
|
describe Dict do
|
6
6
|
|
7
|
-
it "should get single translation from dictionary with two arguments given" do
|
8
|
-
expect{
|
9
|
-
Dict.get_single_dictionary_translations('samochód', 'dictpl')
|
10
|
-
}.to_not raise_error
|
11
|
-
end
|
12
|
-
|
13
|
-
it "should return hash with translations from all dictionaries" do
|
14
|
-
Dict.get_all_dictionaries_translations('samochód').should be_a(Hash)
|
15
|
-
end
|
16
|
-
|
17
7
|
it "should return array of available services which is not empty" do
|
18
8
|
arr = Dict.available_dictionaries
|
19
|
-
|
20
|
-
|
9
|
+
arr.should be_a(Array)
|
10
|
+
arr.size.should_not == 0
|
21
11
|
end
|
22
12
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
13
|
+
it "should return array of available services, which contains wiktionary and dictpl" do
|
14
|
+
Dict.available_dictionaries.should == ['wiktionary', 'dictpl']
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
it "should return whatever Dictpl returns embedded in a hash" do
|
20
|
+
dictpl = stub(:translate => 'DICTPL_RESULTS')
|
21
|
+
Dict::Dictpl.should_receive(:new).with('WORD').and_return(dictpl)
|
22
|
+
Dict.get_single_dictionary_translations('WORD', 'dictpl').should == 'DICTPL_RESULTS'
|
23
|
+
end
|
27
24
|
end
|
@@ -9,26 +9,22 @@ describe Dict::Wiktionary do
|
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should return an two element array of translations of word samochód containing [\"car\",\"automobile\"]" do
|
12
|
-
w = Dict::Wiktionary.new('samochód')
|
13
|
-
w.
|
14
|
-
w.translations.should == ["car", "automobile"]
|
12
|
+
w = Dict::Wiktionary.new('samochód').translate
|
13
|
+
w.translations.should == {"samochód"=>["car", "automobile"]}
|
15
14
|
end
|
16
15
|
|
17
|
-
it "should return
|
18
|
-
w = Dict::Wiktionary.new('samochód')
|
19
|
-
w.
|
20
|
-
w.translations.should be_a(Array)
|
16
|
+
it "should return a hash with translations" do
|
17
|
+
w = Dict::Wiktionary.new('samochód').translate
|
18
|
+
w.translations.should be_a(Hash)
|
21
19
|
end
|
22
20
|
|
23
|
-
it "should return
|
24
|
-
w = Dict::Wiktionary.new('samochód')
|
25
|
-
w.
|
26
|
-
w.examples.should be_a(Array)
|
21
|
+
it "should return a Resut object" do
|
22
|
+
w = Dict::Wiktionary.new('samochód').translate
|
23
|
+
w.should be_a(Dict::Result)
|
27
24
|
end
|
28
|
-
|
29
|
-
it "should return
|
30
|
-
|
31
|
-
|
25
|
+
|
26
|
+
it "should return translation from english to polish for word 'field'" do
|
27
|
+
result = Dict::Wiktionary.new('field').translate.translations
|
28
|
+
result.should eq({"field"=>["pole", "pole (magnetyczne, elektryczne, sił, itp.)", "pole (skalarne, wektorowe, itp.)", "ciało (liczb rzeczywistych, zespolonych, itp.)", "wystawić (drużynę)", "odpowiadać (na pytania)", "polowy", "polny"]})
|
32
29
|
end
|
33
|
-
|
34
30
|
end
|