dict 0.3.3 → 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -1
- data/.rspec +2 -2
- data/.travis.yml +14 -14
- data/Gemfile +4 -4
- data/Gemfile.lock +40 -40
- data/README.md +34 -34
- data/Rakefile +8 -8
- data/bin/dict +7 -7
- data/dict.gemspec +28 -28
- data/lib/dict/cli/runner.rb +137 -115
- data/lib/dict/dict.rb +40 -53
- data/lib/dict/dictionary.rb +45 -62
- data/lib/dict/glosbe.rb +68 -66
- data/lib/dict/result.rb +38 -38
- data/lib/dict/version.rb +3 -3
- data/lib/dict/wiktionary.rb +91 -91
- data/lib/dict.rb +5 -5
- data/spec/dict/lib_dict_cli_runner_spec.rb +120 -120
- data/spec/dict/lib_dict_spec.rb +39 -39
- data/spec/dict/lib_glosbe_spec.rb +48 -34
- data/spec/dict/lib_wiktionary_spec.rb +68 -62
- data/spec/dict/spec_helper.rb +17 -17
- data/spec/dict/vcr_cassettes/glosbe_translations_asdfff_cassette.yml +1518 -0
- data/spec/dict/vcr_cassettes/glosbe_translations_atomic_cassette.yml +2794 -2794
- data/spec/dict/vcr_cassettes/glosbe_translations_usage_cassette.yml +2530 -0
- data/spec/dict/vcr_cassettes/glosbe_translations_woda_cassette.yml +1949 -1949
- data/spec/dict/vcr_cassettes/slowik_runner_cassette.yml +4178 -4178
- data/spec/dict/vcr_cassettes/translations_dragon_cassette.yml +8659 -8659
- data/spec/dict/vcr_cassettes/translations_slownik_cassette.yml +4177 -4177
- data/spec/dict/vcr_cassettes/wiktionary_no_usage_examples.yml +8634 -0
- data/spec/dict/vcr_cassettes/wiktionary_translate_result_uppercase.yml +6120 -0
- data/spec/dict/vcr_cassettes/wiktionary_translations_field_cassette.yml +8600 -8600
- data/spec/dict/vcr_cassettes/wiktionary_translations_samochod_cassette.yml +6140 -6140
- data/spec/dict/vcr_cassettes/wiktionary_usage_examples_kot.yml +6444 -0
- data/spec/dict/vcr_setup.rb +19 -19
- metadata +9 -4
data/lib/dict/dictionary.rb
CHANGED
@@ -1,62 +1,45 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
# It is a base class for classes fetching results from Web dictionaries.
|
4
|
-
|
5
|
-
require 'open-uri'
|
6
|
-
require 'dict/result'
|
7
|
-
|
8
|
-
module Dict
|
9
|
-
class Dictionary
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
@
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end
|
47
|
-
|
48
|
-
class ConnectError < Exception
|
49
|
-
attr_reader :original
|
50
|
-
|
51
|
-
def initialize(original = $!)
|
52
|
-
@original = original
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
private
|
57
|
-
# Returns a word with all downcased letters, including polish
|
58
|
-
def downcase_word(word)
|
59
|
-
word.downcase.gsub(/[ĄĆĘŁŃÓŚŹŻ]/, 'Ą' => 'ą', 'Ć' => 'ć', 'Ę' => 'ę', 'Ł' => 'ł', 'Ń' => 'ń', 'Ó' => 'ó', 'Ś' => 'ś', 'Ź' => 'ź', 'Ż' => 'ż')
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
# It is a base class for classes fetching results from Web dictionaries.
|
4
|
+
|
5
|
+
require 'open-uri'
|
6
|
+
require 'dict/result'
|
7
|
+
|
8
|
+
module Dict
|
9
|
+
class Dictionary
|
10
|
+
|
11
|
+
def initialize(word)
|
12
|
+
check_arguments(word)
|
13
|
+
@word = downcase_word(word)
|
14
|
+
@result = Dict::Result.new(@word)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns an instance of URI::HTTP class.
|
18
|
+
def uri(url, word = nil)
|
19
|
+
word == nil ? URI(URI.escape(url)) : URI(URI.escape(url + word.downcase.tr(' ', '_')))
|
20
|
+
end
|
21
|
+
|
22
|
+
# Checks if word was given correctly.
|
23
|
+
def check_arguments(word)
|
24
|
+
raise ArgumentError.new("No given word") if word.empty?
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.message
|
28
|
+
'There\'s no such dictionary in database.'
|
29
|
+
end
|
30
|
+
|
31
|
+
class ConnectError < Exception
|
32
|
+
attr_reader :original
|
33
|
+
|
34
|
+
def initialize(original = $!)
|
35
|
+
@original = original
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
# Returns a word with all downcased letters, including polish
|
41
|
+
def downcase_word(word)
|
42
|
+
word.downcase.gsub(/[ĄĆĘŁŃÓŚŹŻ]/, 'Ą' => 'ą', 'Ć' => 'ć', 'Ę' => 'ę', 'Ł' => 'ł', 'Ń' => 'ń', 'Ó' => 'ó', 'Ś' => 'ś', 'Ź' => 'ź', 'Ż' => 'ż')
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/dict/glosbe.rb
CHANGED
@@ -1,66 +1,68 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
require 'nokogiri'
|
4
|
-
require 'dict/dictionary'
|
5
|
-
|
6
|
-
GLOSBE_PL = 'http://glosbe.com/pl/en/' # polish - english
|
7
|
-
GLOSBE_EN = 'http://glosbe.com/en/pl/' # english - polish
|
8
|
-
|
9
|
-
module Dict
|
10
|
-
class Glosbe < Dictionary
|
11
|
-
# returns an Dict::Result object
|
12
|
-
def translate
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
doc
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
end
|
40
|
-
|
41
|
-
# returns array with structure as shown below from the given dictionary link
|
42
|
-
# ['TRANSLATION1', 'TRANSLATION2', ...]
|
43
|
-
def get_translations(doc)
|
44
|
-
|
45
|
-
translations
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
#
|
55
|
-
#
|
56
|
-
|
57
|
-
|
58
|
-
examples
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
end
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'dict/dictionary'
|
5
|
+
|
6
|
+
GLOSBE_PL = 'http://glosbe.com/pl/en/' # polish - english
|
7
|
+
GLOSBE_EN = 'http://glosbe.com/en/pl/' # english - polish
|
8
|
+
|
9
|
+
module Dict
|
10
|
+
class Glosbe < Dictionary
|
11
|
+
# returns an Dict::Result object
|
12
|
+
def translate
|
13
|
+
if is_polish?(doc = get_content(GLOSBE_PL, @word))
|
14
|
+
add_translations(get_translations(doc))
|
15
|
+
add_examples(get_examples(doc, 'en'))
|
16
|
+
else
|
17
|
+
doc = get_content(GLOSBE_EN, @word)
|
18
|
+
add_translations(get_translations(doc))
|
19
|
+
add_examples(get_examples(doc, 'pl'))
|
20
|
+
end
|
21
|
+
|
22
|
+
@result
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# checks if given word is polish
|
28
|
+
def is_polish?(doc)
|
29
|
+
!doc.empty? && doc.at_css('.content_box_rounded p').nil?
|
30
|
+
end
|
31
|
+
|
32
|
+
# returns instance of Nokogiri::HTML module
|
33
|
+
def get_content(url, word)
|
34
|
+
begin
|
35
|
+
Nokogiri::HTML(open(uri(url, word))).css('.wordDetails')
|
36
|
+
rescue => e
|
37
|
+
""
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# returns array with structure as shown below from the given dictionary link
|
42
|
+
# ['TRANSLATION1', 'TRANSLATION2', ...]
|
43
|
+
def get_translations(doc)
|
44
|
+
translations = []
|
45
|
+
doc.css('.phrase-container > .translation').each { |translation| translations.push(translation.text.downcase) } if !doc.empty?
|
46
|
+
translations
|
47
|
+
end
|
48
|
+
|
49
|
+
# add obtained translations to Dict::Result object
|
50
|
+
def add_translations(translations)
|
51
|
+
translations.each { |translation| @result.add_translation(@result.term, translation) }
|
52
|
+
end
|
53
|
+
|
54
|
+
# returns array with structure as shown below from the given dictionary link
|
55
|
+
# ['EXAMPLE1', 'EXAMPLE2', ...]
|
56
|
+
# the default length of given example is 60 characters
|
57
|
+
def get_examples(doc, lang, length = 60)
|
58
|
+
examples = []
|
59
|
+
doc.css(".tranlastionMemory td[lang=#{lang}]").each { |example| examples.push(example.text.capitalize) if example.text.length < length } if !doc.empty?
|
60
|
+
examples
|
61
|
+
end
|
62
|
+
|
63
|
+
# add obtained examples to Dict::Result object
|
64
|
+
def add_examples(examples)
|
65
|
+
examples.each { |example| @result.add_example(@result.term, example) }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
data/lib/dict/result.rb
CHANGED
@@ -1,38 +1,38 @@
|
|
1
|
-
# Objects of this class are returned by methods retrieving translations
|
2
|
-
# from Web dictionaries.
|
3
|
-
|
4
|
-
module Dict
|
5
|
-
class Result
|
6
|
-
attr_reader :term, :translations, :examples
|
7
|
-
|
8
|
-
def initialize(term)
|
9
|
-
@term = term
|
10
|
-
@translations = {}
|
11
|
-
@examples = {}
|
12
|
-
end
|
13
|
-
|
14
|
-
def add_translation(term, translation)
|
15
|
-
add_result(@translations, term, translation)
|
16
|
-
end
|
17
|
-
|
18
|
-
def add_example(term, example)
|
19
|
-
add_result(@examples, term, example)
|
20
|
-
end
|
21
|
-
|
22
|
-
def each_translation
|
23
|
-
@translations.each_pair do |term,translation|
|
24
|
-
yield term, translation
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
private
|
29
|
-
def add_result(hash, key, value)
|
30
|
-
if hash.has_key?(key)
|
31
|
-
hash[key].push(value)
|
32
|
-
else
|
33
|
-
hash.merge!({ key => [value] })
|
34
|
-
end
|
35
|
-
self
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
1
|
+
# Objects of this class are returned by methods retrieving translations
|
2
|
+
# from Web dictionaries.
|
3
|
+
|
4
|
+
module Dict
|
5
|
+
class Result
|
6
|
+
attr_reader :term, :translations, :examples
|
7
|
+
|
8
|
+
def initialize(term)
|
9
|
+
@term = term
|
10
|
+
@translations = {}
|
11
|
+
@examples = {}
|
12
|
+
end
|
13
|
+
|
14
|
+
def add_translation(term, translation)
|
15
|
+
add_result(@translations, term, translation)
|
16
|
+
end
|
17
|
+
|
18
|
+
def add_example(term, example)
|
19
|
+
add_result(@examples, term, example)
|
20
|
+
end
|
21
|
+
|
22
|
+
def each_translation
|
23
|
+
@translations.each_pair do |term,translation|
|
24
|
+
yield term, translation
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
def add_result(hash, key, value)
|
30
|
+
if hash.has_key?(key)
|
31
|
+
hash[key].push(value)
|
32
|
+
else
|
33
|
+
hash.merge!({ key => [value] })
|
34
|
+
end
|
35
|
+
self
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/dict/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
module Dict
|
2
|
-
VERSION = "0.3.
|
3
|
-
end
|
1
|
+
module Dict
|
2
|
+
VERSION = "0.3.4"
|
3
|
+
end
|
data/lib/dict/wiktionary.rb
CHANGED
@@ -1,91 +1,91 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
# Class fetching translations of given word from wiktionary.org.
|
4
|
-
|
5
|
-
require 'nokogiri'
|
6
|
-
require 'dict/dictionary'
|
7
|
-
|
8
|
-
module Dict
|
9
|
-
class Wiktionary < Dictionary
|
10
|
-
|
11
|
-
# Returns an Dict::Result object.
|
12
|
-
|
13
|
-
translations.each do |translation|
|
14
|
-
@result.add_translation(@result.term, translation.gsub(/(\s[^|\s]+\|)/,' '))
|
15
|
-
examples(translation).each { |example| @result.add_example(translation, example) }
|
16
|
-
end
|
17
|
-
|
18
|
-
@result
|
19
|
-
end
|
20
|
-
|
21
|
-
def get_html(url)
|
22
|
-
begin
|
23
|
-
Nokogiri::HTML(open(URI.encode(url)))
|
24
|
-
rescue OpenURI::HTTPError
|
25
|
-
raise Dictionary::ConnectError
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
private
|
30
|
-
def polish?(content)
|
31
|
-
! /==Polish==/i.match(content).nil?
|
32
|
-
end
|
33
|
-
|
34
|
-
# Returns an array containing translations.
|
35
|
-
def translations
|
36
|
-
url_pl = "http://en.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
37
|
-
url_en = "http://pl.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
38
|
-
|
39
|
-
content_pl = get_html(url_pl).css('textarea#wpTextbox1').first
|
40
|
-
if polish?(content_pl)
|
41
|
-
@is_polish = true
|
42
|
-
extract_polish_translations(content_pl)
|
43
|
-
else
|
44
|
-
@is_polish = false
|
45
|
-
extract_english_translations(get_html(url_en).css('textarea#wpTextbox1').first.content)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
# Returns an array containing polish translations.
|
50
|
-
def extract_polish_translations(content)
|
51
|
-
translations = /Noun[^\{]+\{\{(?:head\|pl|pl\-noun)[^#]+#\s*\[\[([^\n]+)/.match(content)
|
52
|
-
translations = (translations && translations[1].gsub(/\[|\]/,'').split(', ')) || []
|
53
|
-
end
|
54
|
-
|
55
|
-
# Returns an array containing english translations.
|
56
|
-
def extract_english_translations(content)
|
57
|
-
translations_block = /język\s+angielski(?:.|\n)+\{\{znaczenia\}\}(.|\n)+(?:\{\{odmiana){1,}/.match(content)
|
58
|
-
return [] unless translations_block.instance_of?(MatchData)
|
59
|
-
translations_block = translations_block[0].gsub(/odmiana(.|\n)+$/,'')
|
60
|
-
translations = translations_block.scan(/:\s*\(\d\.?\d?\)\s*([^\n]+)/)
|
61
|
-
translations.map! do |translation|
|
62
|
-
translation[0].gsub(/\[|\]|\{\{[^\}]+\}\}|'|<.*/,'').strip
|
63
|
-
end
|
64
|
-
translations.delete_if(&:empty?)
|
65
|
-
translations ||= []
|
66
|
-
end
|
67
|
-
|
68
|
-
def examples(word)
|
69
|
-
url_pl = "http://pl.wiktionary.org/w/index.php?title=#{word}&action=edit"
|
70
|
-
|
71
|
-
if @is_polish
|
72
|
-
extract_english_examples(word)
|
73
|
-
else
|
74
|
-
[]
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
# Returns an array containing usage examples of translated polish word to english.
|
79
|
-
def extract_english_examples(word)
|
80
|
-
word = word.gsub(/\s+\(.+$/,'') || ''
|
81
|
-
url_en = "http://en.wiktionary.org/w/index.php?title=#{word}&action=edit"
|
82
|
-
examples = /Noun[^\{]+\{\{en\-noun[^=]+/.match(get_html(url_en.gsub('{word}',word)).css('textarea#wpTextbox1').first)
|
83
|
-
return [] unless examples.instance_of?(MatchData)
|
84
|
-
examples = examples[0].scan(/#: ''([^\n]+)\n/)
|
85
|
-
examples.map! do |translation|
|
86
|
-
translation[0].gsub(/'{2,}/,'')
|
87
|
-
end
|
88
|
-
examples
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
# Class fetching translations of given word from wiktionary.org.
|
4
|
+
|
5
|
+
require 'nokogiri'
|
6
|
+
require 'dict/dictionary'
|
7
|
+
|
8
|
+
module Dict
|
9
|
+
class Wiktionary < Dictionary
|
10
|
+
|
11
|
+
# Returns an Dict::Result object.
|
12
|
+
def translate
|
13
|
+
translations.each do |translation|
|
14
|
+
@result.add_translation(@result.term, translation.gsub(/(\s[^|\s]+\|)/,' '))
|
15
|
+
examples(translation).each { |example| @result.add_example(translation, example) }
|
16
|
+
end
|
17
|
+
|
18
|
+
@result
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_html(url)
|
22
|
+
begin
|
23
|
+
Nokogiri::HTML(open(URI.encode(url)))
|
24
|
+
rescue OpenURI::HTTPError
|
25
|
+
raise Dictionary::ConnectError
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
def polish?(content)
|
31
|
+
! /==Polish==/i.match(content).nil?
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns an array containing translations.
|
35
|
+
def translations
|
36
|
+
url_pl = "http://en.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
37
|
+
url_en = "http://pl.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
38
|
+
|
39
|
+
content_pl = get_html(url_pl).css('textarea#wpTextbox1').first
|
40
|
+
if polish?(content_pl)
|
41
|
+
@is_polish = true
|
42
|
+
extract_polish_translations(content_pl)
|
43
|
+
else
|
44
|
+
@is_polish = false
|
45
|
+
extract_english_translations(get_html(url_en).css('textarea#wpTextbox1').first.content)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Returns an array containing polish translations.
|
50
|
+
def extract_polish_translations(content)
|
51
|
+
translations = /Noun[^\{]+\{\{(?:head\|pl|pl\-noun)[^#]+#\s*\[\[([^\n]+)/.match(content)
|
52
|
+
translations = (translations && translations[1].gsub(/\[|\]/,'').split(', ')) || []
|
53
|
+
end
|
54
|
+
|
55
|
+
# Returns an array containing english translations.
|
56
|
+
def extract_english_translations(content)
|
57
|
+
translations_block = /język\s+angielski(?:.|\n)+\{\{znaczenia\}\}(.|\n)+(?:\{\{odmiana){1,}/.match(content)
|
58
|
+
return [] unless translations_block.instance_of?(MatchData)
|
59
|
+
translations_block = translations_block[0].gsub(/odmiana(.|\n)+$/,'')
|
60
|
+
translations = translations_block.scan(/:\s*\(\d\.?\d?\)\s*([^\n]+)/)
|
61
|
+
translations.map! do |translation|
|
62
|
+
translation[0].gsub(/\[|\]|\{\{[^\}]+\}\}|'|<.*/,'').strip
|
63
|
+
end
|
64
|
+
translations.delete_if(&:empty?)
|
65
|
+
translations ||= []
|
66
|
+
end
|
67
|
+
|
68
|
+
def examples(word)
|
69
|
+
url_pl = "http://pl.wiktionary.org/w/index.php?title=#{word}&action=edit"
|
70
|
+
|
71
|
+
if @is_polish
|
72
|
+
extract_english_examples(word)
|
73
|
+
else
|
74
|
+
[]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns an array containing usage examples of translated polish word to english.
|
79
|
+
def extract_english_examples(word)
|
80
|
+
word = word.gsub(/\s+\(.+$/,'') || ''
|
81
|
+
url_en = "http://en.wiktionary.org/w/index.php?title=#{word}&action=edit"
|
82
|
+
examples = /Noun[^\{]+\{\{en\-noun[^=]+/.match(get_html(url_en.gsub('{word}',word)).css('textarea#wpTextbox1').first)
|
83
|
+
return [] unless examples.instance_of?(MatchData)
|
84
|
+
examples = examples[0].scan(/#: ''([^\n]+)\n/)
|
85
|
+
examples.map! do |translation|
|
86
|
+
translation[0].gsub(/'{2,}/,'')
|
87
|
+
end
|
88
|
+
examples
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/lib/dict.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
require 'dict/dict'
|
2
|
-
|
3
|
-
module Dict
|
4
|
-
autoload :VERSION, "dict/version"
|
5
|
-
end
|
1
|
+
require 'dict/dict'
|
2
|
+
|
3
|
+
module Dict
|
4
|
+
autoload :VERSION, "dict/version"
|
5
|
+
end
|