cambridge_dict 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -3
- data/lib/cambridge_dict/version.rb +1 -1
- data/lib/cambridge_dict.rb +17 -8
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8c7205facd81e402d36a810dc1b595547207b0f6e3168193fd4bcb56b5c2f9ee
|
4
|
+
data.tar.gz: 29adecbe8c3d0f96e59f8b43ff1b34fc093291af9a2649568ad716104aa78792
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7dfa104d86a45b96b7dac3b7a97beb99c14f662ddc25b58b2e58e2e262d82f8730458b39e5e6ffb2aca9824188bb531e54bbcd38f0841e4777c561777269c13c
|
7
|
+
data.tar.gz: 2e8f2f7f5ceac4f88568b7752d950add874c99472671b5ff04e86c3330042de34774462eb6a42a0833b1d252ed808619c455a5dc24d80dca82b42e324c629673
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Cambridge Dict
|
2
2
|
|
3
|
-
Look up words for
|
3
|
+
Look up words for Cambridge via code or web server.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -35,16 +35,17 @@ return if result.nil?
|
|
35
35
|
{:id=>5, :type=>"Past tense", :text=>"backed"},
|
36
36
|
{:id=>6, :type=>"Past participle", :text=>"backed"},
|
37
37
|
{:id=>7, :type=>"Present participle", :text=>"backing"}],
|
38
|
-
:
|
38
|
+
:pronunciations=>
|
39
39
|
[{:pos=>"adverb", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
|
40
40
|
{:pos=>"adverb", :lang=>"uk", :url=>"https://dictionary.cambridge.org/us/media/english/uk_pron/u/uka/ukazt/ukazt__024.mp3", :pron=>"/bæk/"},
|
41
41
|
{:pos=>"noun", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
|
42
42
|
{:pos=>"noun", :lang=>"uk", :url=>"https://dictionary.cambridge.org/us/media/english/uk_pron/u/uka/ukazt/ukazt__024.mp3", :pron=>"/bæk/"},
|
43
43
|
{:pos=>"verb", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
|
44
44
|
...
|
45
|
-
:
|
45
|
+
:definitions=>
|
46
46
|
[{:id=>0,
|
47
47
|
:pos=>"adverb",
|
48
|
+
:headword=>"back",
|
48
49
|
:source=>"cald4-us",
|
49
50
|
:text=>"in, into, or toward a previous place or condition, or an earlier time: ",
|
50
51
|
:translation=>nil,
|
@@ -59,6 +60,7 @@ return if result.nil?
|
|
59
60
|
{:id=>7, :text=>"This tradition dates back to the 16th century.", :translation=>nil}]},
|
60
61
|
{:id=>1,
|
61
62
|
:pos=>"adverb",
|
63
|
+
:headword=>"back",
|
62
64
|
:source=>"cald4-us",
|
63
65
|
:text=>"in return: ",
|
64
66
|
:translation=>nil,
|
@@ -75,6 +77,8 @@ require 'cambridge_dict/server'
|
|
75
77
|
run CambridgeDict::Server
|
76
78
|
|
77
79
|
rackup config.ru
|
80
|
+
|
81
|
+
# curl localhost:9292/api/dictionary/:language/:word
|
78
82
|
```
|
79
83
|
|
80
84
|
## Development
|
data/lib/cambridge_dict.rb
CHANGED
@@ -7,6 +7,8 @@ module CambridgeDict
|
|
7
7
|
class Error < StandardError; end
|
8
8
|
|
9
9
|
class Client
|
10
|
+
PARSER = URI::Parser.new
|
11
|
+
|
10
12
|
def initialize
|
11
13
|
@base_url = "https://dictionary.cambridge.org"
|
12
14
|
@wiki_base = "https://simple.wiktionary.org/wiki"
|
@@ -22,7 +24,8 @@ module CambridgeDict
|
|
22
24
|
|
23
25
|
def lookup(word, language: 'en')
|
24
26
|
nation, dict_lang = parse_language(language)
|
25
|
-
|
27
|
+
encoded_word = word.gsub /[\s']/, "-"
|
28
|
+
url = "#{@base_url}/#{nation}/dictionary/#{dict_lang}/#{encoded_word}"
|
26
29
|
|
27
30
|
response = HTTParty.get(url, {
|
28
31
|
headers: {"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.3"},
|
@@ -30,7 +33,9 @@ module CambridgeDict
|
|
30
33
|
return nil unless response.success?
|
31
34
|
|
32
35
|
doc = Nokogiri::HTML(response.body)
|
33
|
-
|
36
|
+
|
37
|
+
encoded_word = PARSER.escape(word)
|
38
|
+
wiki_verbs = fetch_verbs("#{@wiki_base}/#{encoded_word}")
|
34
39
|
|
35
40
|
word = extract_word(doc)
|
36
41
|
|
@@ -40,8 +45,8 @@ module CambridgeDict
|
|
40
45
|
word: word,
|
41
46
|
pos: extract_pos(doc),
|
42
47
|
verbs: wiki_verbs,
|
43
|
-
|
44
|
-
|
48
|
+
pronunciations: extract_pronunciation(doc),
|
49
|
+
definitions: extract_definitions(doc)
|
45
50
|
}
|
46
51
|
end
|
47
52
|
|
@@ -73,7 +78,9 @@ module CambridgeDict
|
|
73
78
|
end
|
74
79
|
|
75
80
|
def fetch_verbs(url)
|
76
|
-
response = HTTParty.get(url
|
81
|
+
response = HTTParty.get(url, {
|
82
|
+
headers: {"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.3"},
|
83
|
+
})
|
77
84
|
return [] unless response.success?
|
78
85
|
|
79
86
|
doc = Nokogiri::HTML(response.body)
|
@@ -92,7 +99,7 @@ module CambridgeDict
|
|
92
99
|
end
|
93
100
|
|
94
101
|
def extract_word(doc)
|
95
|
-
doc.at_css('.
|
102
|
+
doc.at_css('.superentry .tb')&.text
|
96
103
|
end
|
97
104
|
|
98
105
|
def extract_pos(doc)
|
@@ -139,13 +146,15 @@ module CambridgeDict
|
|
139
146
|
end
|
140
147
|
|
141
148
|
pos = entry&.at_css('.pos.dpos')&.text
|
149
|
+
headword = entry&.at_css('.headword .hw')&.text
|
142
150
|
|
143
151
|
definition = {
|
144
152
|
id: index,
|
153
|
+
headword: headword,
|
145
154
|
pos: pos,
|
146
155
|
source: source || nil,
|
147
|
-
text: block.at_css('.def.ddef_d.db')&.text,
|
148
|
-
translation: block.at_css('.def-body.ddef_b > span.trans.dtrans')&.text,
|
156
|
+
text: block.at_css('.def.ddef_d.db')&.text&.strip,
|
157
|
+
translation: block.at_css('.def-body.ddef_b > span.trans.dtrans')&.text&.strip,
|
149
158
|
example: extract_examples(block)
|
150
159
|
}
|
151
160
|
|