dict 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +11 -0
- data/lib/dict/version.rb +1 -1
- data/lib/dict/wiktionary.rb +34 -31
- data/spec/dict/lib_dict_cli_runner_spec.rb +14 -8
- data/spec/dict/lib_dict_spec.rb +18 -3
- data/spec/dict/lib_dictpl_spec.rb +5 -5
- data/spec/dict/lib_wiktionary_spec.rb +23 -10
- data/spec/dict/vcr_cassettes/dictpl_examples_krowa_cassette.yml +1040 -0
- data/spec/dict/vcr_cassettes/dictpl_samochod_cassette.yml +1089 -0
- data/spec/dict/vcr_cassettes/dictpl_translations_krowa_cassette.yml +1040 -0
- data/spec/dict/vcr_cassettes/translations_slownik_cassette.yml +3207 -0
- data/spec/dict/vcr_cassettes/wiktionary_translations_field_cassette.yml +8559 -0
- data/spec/dict/vcr_cassettes/wiktionary_translations_samochod_cassette.yml +2571 -0
- metadata +7 -1
data/README.md
CHANGED
@@ -1,2 +1,13 @@
|
|
1
1
|
# dict [![Build Status](https://secure.travis-ci.org/Ragnarson/dict-gem.png?branch=master)](http://travis-ci.org/Ragnarson/dict-gem)
|
2
2
|
CLI and backend for dict - an open source dictionary aggregator.
|
3
|
+
|
4
|
+
## Usage ##
|
5
|
+
To search for `WORD`:
|
6
|
+
|
7
|
+
dict WORD [OPTIONS]
|
8
|
+
|
9
|
+
### Available options ###
|
10
|
+
|
11
|
+
- `-h` or `--help` - displays help message
|
12
|
+
- `-t` or `--time` - used to set timeout in seconds. Default value: 300
|
13
|
+
- `-d` or `--dict` - used to select desired dictionary
|
data/lib/dict/version.rb
CHANGED
data/lib/dict/wiktionary.rb
CHANGED
@@ -7,17 +7,10 @@ WIKI_URL = 'http://en.wiktionary.org/wiki/'
|
|
7
7
|
|
8
8
|
module Dict
|
9
9
|
class Wiktionary < Dictionary
|
10
|
-
|
10
|
+
|
11
|
+
# Returns an Dict::Result object
|
11
12
|
def translate
|
12
|
-
|
13
|
-
url_en = "http://pl.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
14
|
-
|
15
|
-
content = get_html(url_pl).css('textarea#wpTextbox1').first
|
16
|
-
if polish?(get_html(url_pl).css('textarea#wpTextbox1').first)
|
17
|
-
get_translations(get_html(url_pl).css('textarea#wpTextbox1').first)
|
18
|
-
else
|
19
|
-
get_translations(get_html(url_en).css('textarea#wpTextbox1').first.content, false)
|
20
|
-
end
|
13
|
+
translations.each { |item| @result.add_translation(@result.term, item) }
|
21
14
|
|
22
15
|
@result
|
23
16
|
end
|
@@ -30,33 +23,43 @@ module Dict
|
|
30
23
|
|
31
24
|
private
|
32
25
|
def polish?(content)
|
33
|
-
|
34
|
-
false
|
26
|
+
! /==Polish==/i.match(content).nil?
|
35
27
|
end
|
36
28
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
29
|
+
# Returns an array containing translations.
|
30
|
+
def translations
|
31
|
+
url_pl = "http://en.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
32
|
+
url_en = "http://pl.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
33
|
+
|
34
|
+
content_pl = get_html(url_pl).css('textarea#wpTextbox1').first
|
35
|
+
if polish?(content_pl)
|
36
|
+
extract_polish_translations(content_pl)
|
42
37
|
else
|
43
|
-
|
44
|
-
return unless translations_block.instance_of?(MatchData)
|
45
|
-
translations_block = translations_block[0].gsub(/odmiana(.|\n)+$/,'')
|
46
|
-
translations = translations_block.scan(/:\s*\(\d\.?\d?\)\s*([^\n]+)/)
|
47
|
-
translations.map! do |translation|
|
48
|
-
translation[0].gsub(/\[|\]|\{\{[^\}]+\}\}|'/,'').strip
|
49
|
-
end
|
50
|
-
translations.delete_if do |item|
|
51
|
-
item.empty?
|
52
|
-
end
|
53
|
-
translations ||= []
|
54
|
-
translations.each { |item| @result.add_translation(@result.term, item) }
|
38
|
+
extract_english_translations(get_html(url_en).css('textarea#wpTextbox1').first.content)
|
55
39
|
end
|
56
40
|
end
|
57
41
|
|
58
|
-
|
59
|
-
|
42
|
+
# Returns an array containing polish translations.
|
43
|
+
def extract_polish_translations(content)
|
44
|
+
translations = /Noun[^\{]+\{\{(?:head\|pl|pl\-noun)[^#]+#\s*\[\[([^\n]+)/.match(content)
|
45
|
+
translations = (translations && translations[1].gsub(/\[|\]/,'').split(', ')) || []
|
46
|
+
end
|
47
|
+
|
48
|
+
# Returns an array containing english translations.
|
49
|
+
def extract_english_translations(content)
|
50
|
+
translations_block = /język\s+angielski(?:.|\n)+\{\{znaczenia\}\}(.|\n)+(?:\{\{odmiana){1,}/.match(content)
|
51
|
+
return [] unless translations_block.instance_of?(MatchData)
|
52
|
+
translations_block = translations_block[0].gsub(/odmiana(.|\n)+$/,'')
|
53
|
+
translations = translations_block.scan(/:\s*\(\d\.?\d?\)\s*([^\n]+)/)
|
54
|
+
translations.map! do |translation|
|
55
|
+
translation[0].gsub(/\[|\]|\{\{[^\}]+\}\}|'|<.*/,'').strip
|
56
|
+
end
|
57
|
+
translations.delete_if(&:empty?)
|
58
|
+
translations ||= []
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_examples(content)
|
62
|
+
|
60
63
|
end
|
61
64
|
end
|
62
65
|
end
|
@@ -1,4 +1,6 @@
|
|
1
1
|
# -*- coding: utf-8 -*
|
2
|
+
|
3
|
+
require_relative './vcr_setup'
|
2
4
|
require 'dict/cli/runner'
|
3
5
|
|
4
6
|
describe "parameters_valid?" do
|
@@ -34,17 +36,21 @@ end
|
|
34
36
|
|
35
37
|
describe "get_translations" do
|
36
38
|
it "should return results from wiktionary and dictpl for word 'słowik'" do
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
39
|
+
VCR.use_cassette('translations_slownik_cassette') do
|
40
|
+
stub_const("ARGV", ["słowik"])
|
41
|
+
runner = Dict::CLI::Runner.new
|
42
|
+
opts = runner.parse_parameters
|
43
|
+
runner.get_translations(opts, "słowik").should == {"wiktionary"=>{"słowik"=>["nightingale"]}, "dictpl"=>{"słowik"=>["nightingale"], "słowik białobrewy; Luscinia indicus; Tarsiger indicus (gatunek ptaka)"=>["white-browed bush-robin"], "słowik białosterny; Luscinia pectoralis (gatunek ptaka)"=>["Himalayan rubythroat", "white-tailed rubythroat"], "słowik chiński; pekińczyk żółty; Leiothrix lutea"=>["Pekin robin", "red-billed leiothrix"], "słowik chiński; pekińczyk żółty; pekińczyk koralodzioby; Leiothrix lutea"=>["Peking robin"], "słowik czarnogardły; Luscinia obscura"=>["black-throated blue robin"], "słowik himalajski; Luscinia brunnea (gatunek ptaka)"=>["Indian blue chat", "Indian blue robin"], "słowik modry; Luscinia cyane"=>["Siberian blue robin"], "słowik obrożny; Luscinia johnstoniae; Tarsiger johnstoniae (gatunek ptaka)"=>["collared bush-robin"]}}
|
44
|
+
end
|
41
45
|
end
|
42
46
|
|
43
47
|
it "should return results from selected dictionary for word 'słowik'" do
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
+
VCR.use_cassette('translations_slownik_cassette') do
|
49
|
+
stub_const("ARGV", ["słowik", "-d", "dictpl"])
|
50
|
+
runner = Dict::CLI::Runner.new
|
51
|
+
opts = runner.parse_parameters
|
52
|
+
runner.get_translations(opts, "słowik").should == {"słowik"=>["nightingale"], "słowik białobrewy; Luscinia indicus; Tarsiger indicus (gatunek ptaka)"=>["white-browed bush-robin"], "słowik białosterny; Luscinia pectoralis (gatunek ptaka)"=>["Himalayan rubythroat", "white-tailed rubythroat"], "słowik chiński; pekińczyk żółty; Leiothrix lutea"=>["Pekin robin", "red-billed leiothrix"], "słowik chiński; pekińczyk żółty; pekińczyk koralodzioby; Leiothrix lutea"=>["Peking robin"], "słowik czarnogardły; Luscinia obscura"=>["black-throated blue robin"], "słowik himalajski; Luscinia brunnea (gatunek ptaka)"=>["Indian blue chat", "Indian blue robin"], "słowik modry; Luscinia cyane"=>["Siberian blue robin"], "słowik obrożny; Luscinia johnstoniae; Tarsiger johnstoniae (gatunek ptaka)"=>["collared bush-robin"]}
|
53
|
+
end
|
48
54
|
end
|
49
55
|
|
50
56
|
it "should return timeout message for word słowik and -t 5" do
|
data/spec/dict/lib_dict_spec.rb
CHANGED
@@ -12,10 +12,25 @@ describe Dict do
|
|
12
12
|
|
13
13
|
it "should return array of available services, which contains wiktionary and dictpl" do
|
14
14
|
Dict.available_dictionaries.should == ['wiktionary', 'dictpl']
|
15
|
-
end
|
16
|
-
|
15
|
+
end
|
17
16
|
|
18
|
-
|
17
|
+
it "should return hash with translations from all dictionaries" do
|
18
|
+
wiktionary = stub(:translate => stub(:translations => {'WORD' => 'WIKTIONARY_RESULTS'}))
|
19
|
+
Dict::Wiktionary.should_receive(:new).with('WORD').and_return(wiktionary)
|
20
|
+
|
21
|
+
dictpl = stub(:translate =>{'WORD' => 'DICTPL_RESULTS'})
|
22
|
+
Dict::Dictpl.should_receive(:new).with('WORD').and_return(dictpl)
|
23
|
+
|
24
|
+
results = Dict::get_all_dictionaries_translations('WORD')
|
25
|
+
results.should == {'dictpl' => {'WORD' => 'DICTPL_RESULTS'}, 'wiktionary' => {'WORD' => 'WIKTIONARY_RESULTS'}}
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should return whatever Wiktionary returns embedded in a hash" do
|
29
|
+
wiktionary = stub(:translate => stub( :translations => 'WIKTIONARY_RESULTS'))
|
30
|
+
Dict::Wiktionary.should_receive(:new).with('WORD').and_return(wiktionary)
|
31
|
+
Dict.get_single_dictionary_translations('WORD', 'wiktionary').should == 'WIKTIONARY_RESULTS'
|
32
|
+
end
|
33
|
+
|
19
34
|
it "should return whatever Dictpl returns embedded in a hash" do
|
20
35
|
dictpl = stub(:translate => 'DICTPL_RESULTS')
|
21
36
|
Dict::Dictpl.should_receive(:new).with('WORD').and_return(dictpl)
|
@@ -10,7 +10,7 @@ describe Dict::Dictpl do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
it "should return array with translations for 'krowa'" do
|
13
|
-
VCR.use_cassette('
|
13
|
+
VCR.use_cassette('dictpl_translations_krowa_cassette') do
|
14
14
|
d = Dict::Dictpl.new("krowa")
|
15
15
|
d.translate
|
16
16
|
d.translations.should be_a(Array)
|
@@ -19,7 +19,7 @@ describe Dict::Dictpl do
|
|
19
19
|
end
|
20
20
|
|
21
21
|
it "should return array with examples for 'krowa'" do
|
22
|
-
VCR.use_cassette('
|
22
|
+
VCR.use_cassette('dictpl_examples_krowa_cassette') do
|
23
23
|
d = Dict::Dictpl.new("krowa")
|
24
24
|
d.translate
|
25
25
|
d.examples.should be_a(Array)
|
@@ -28,7 +28,7 @@ describe Dict::Dictpl do
|
|
28
28
|
end
|
29
29
|
|
30
30
|
it "should return array with translations for 'samochód'" do
|
31
|
-
VCR.use_cassette('
|
31
|
+
VCR.use_cassette('dictpl_samochod_cassette') do
|
32
32
|
d = Dict::Dictpl.new('samochód')
|
33
33
|
d.translate
|
34
34
|
d.translations.should be_a(Array)
|
@@ -37,7 +37,7 @@ describe Dict::Dictpl do
|
|
37
37
|
end
|
38
38
|
|
39
39
|
it "should return array with examples for 'samochód'" do
|
40
|
-
VCR.use_cassette('
|
40
|
+
VCR.use_cassette('dictpl_samochod_cassette') do
|
41
41
|
d = Dict::Dictpl.new('samochód')
|
42
42
|
d.translate
|
43
43
|
d.examples.should be_a(Array)
|
@@ -46,7 +46,7 @@ describe Dict::Dictpl do
|
|
46
46
|
end
|
47
47
|
|
48
48
|
it "should return a hash from array of paired values" do
|
49
|
-
VCR.use_cassette('
|
49
|
+
VCR.use_cassette('dictpl_samochod_cassette') do
|
50
50
|
d = Dict::Dictpl.new('samochód')
|
51
51
|
d.make_hash_results(d.translate).should be_a(Hash)
|
52
52
|
end
|
@@ -1,30 +1,43 @@
|
|
1
1
|
# -*- encoding: utf-8 -*
|
2
2
|
|
3
|
+
require_relative './vcr_setup'
|
3
4
|
require 'dict/wiktionary'
|
4
5
|
|
5
6
|
describe Dict::Wiktionary do
|
6
7
|
|
7
8
|
it "should raise no given word exception" do
|
8
|
-
expect { Dict::Wiktionary.new }.to raise_error ArgumentError
|
9
|
+
expect { Dict::Wiktionary.new }.to raise_error ArgumentError
|
9
10
|
end
|
10
11
|
|
11
12
|
it "should return an two element array of translations of word samochód containing [\"car\",\"automobile\"]" do
|
12
|
-
|
13
|
-
|
13
|
+
VCR.use_cassette('wiktionary_translations_samochod_cassette') do
|
14
|
+
w = Dict::Wiktionary.new("samochód").translate
|
15
|
+
w.translations.should == {"samochód"=>["car", "automobile"]}
|
16
|
+
end
|
14
17
|
end
|
15
18
|
|
16
19
|
it "should return a hash with translations" do
|
17
|
-
|
18
|
-
|
20
|
+
VCR.use_cassette('wiktionary_translations_samochod_cassette') do
|
21
|
+
w = Dict::Wiktionary.new("samochód").translate
|
22
|
+
w.translations.should be_a(Hash)
|
23
|
+
end
|
19
24
|
end
|
20
25
|
|
21
|
-
it "should return a
|
22
|
-
|
23
|
-
|
26
|
+
it "should return a Result object" do
|
27
|
+
VCR.use_cassette('wiktionary_translations_samochod_cassette') do
|
28
|
+
w = Dict::Wiktionary.new("samochód").translate
|
29
|
+
w.should be_a(Dict::Result)
|
30
|
+
end
|
24
31
|
end
|
25
32
|
|
26
33
|
it "should return translation from english to polish for word 'field'" do
|
27
|
-
|
28
|
-
|
34
|
+
VCR.use_cassette('wiktionary_translations_field_cassette') do
|
35
|
+
w = Dict::Wiktionary.new("field").translate.translations
|
36
|
+
w.should eq({"field"=>["pole", "pole (magnetyczne, elektryczne, sił, itp.)", "pole (skalarne, wektorowe, itp.)", "ciało (liczb rzeczywistych, zespolonych, itp.)", "wystawić (drużynę)", "odpowiadać (na pytania)", "polowy", "polny"]})
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should remove html tags from translations of 'dragon' word" do
|
41
|
+
Dict::Wiktionary.new("dragon").translate.translations.should eq({'dragon' => ['smok']})
|
29
42
|
end
|
30
43
|
end
|