cambridge_dict 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -6
- data/config.ru +2 -1
- data/lib/cambridge_dict/server.rb +29 -0
- data/lib/cambridge_dict/version.rb +1 -1
- data/lib/cambridge_dict.rb +19 -35
- metadata +8 -35
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8c7205facd81e402d36a810dc1b595547207b0f6e3168193fd4bcb56b5c2f9ee
|
4
|
+
data.tar.gz: 29adecbe8c3d0f96e59f8b43ff1b34fc093291af9a2649568ad716104aa78792
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7dfa104d86a45b96b7dac3b7a97beb99c14f662ddc25b58b2e58e2e262d82f8730458b39e5e6ffb2aca9824188bb531e54bbcd38f0841e4777c561777269c13c
|
7
|
+
data.tar.gz: 2e8f2f7f5ceac4f88568b7752d950add874c99472671b5ff04e86c3330042de34774462eb6a42a0833b1d252ed808619c455a5dc24d80dca82b42e324c629673
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Cambridge Dict
|
2
2
|
|
3
|
-
Look up words for
|
3
|
+
Look up words for Cambridge via code or web server.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -35,16 +35,17 @@ return if result.nil?
|
|
35
35
|
{:id=>5, :type=>"Past tense", :text=>"backed"},
|
36
36
|
{:id=>6, :type=>"Past participle", :text=>"backed"},
|
37
37
|
{:id=>7, :type=>"Present participle", :text=>"backing"}],
|
38
|
-
:
|
38
|
+
:pronunciations=>
|
39
39
|
[{:pos=>"adverb", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
|
40
40
|
{:pos=>"adverb", :lang=>"uk", :url=>"https://dictionary.cambridge.org/us/media/english/uk_pron/u/uka/ukazt/ukazt__024.mp3", :pron=>"/bæk/"},
|
41
41
|
{:pos=>"noun", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
|
42
42
|
{:pos=>"noun", :lang=>"uk", :url=>"https://dictionary.cambridge.org/us/media/english/uk_pron/u/uka/ukazt/ukazt__024.mp3", :pron=>"/bæk/"},
|
43
43
|
{:pos=>"verb", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
|
44
44
|
...
|
45
|
-
:
|
45
|
+
:definitions=>
|
46
46
|
[{:id=>0,
|
47
47
|
:pos=>"adverb",
|
48
|
+
:headword=>"back",
|
48
49
|
:source=>"cald4-us",
|
49
50
|
:text=>"in, into, or toward a previous place or condition, or an earlier time: ",
|
50
51
|
:translation=>nil,
|
@@ -59,6 +60,7 @@ return if result.nil?
|
|
59
60
|
{:id=>7, :text=>"This tradition dates back to the 16th century.", :translation=>nil}]},
|
60
61
|
{:id=>1,
|
61
62
|
:pos=>"adverb",
|
63
|
+
:headword=>"back",
|
62
64
|
:source=>"cald4-us",
|
63
65
|
:text=>"in return: ",
|
64
66
|
:translation=>nil,
|
@@ -67,11 +69,16 @@ return if result.nil?
|
|
67
69
|
## Webserver Setup
|
68
70
|
|
69
71
|
```
|
72
|
+
# add sinatra and sinatra-cors to gemfile
|
73
|
+
|
70
74
|
# config.ru
|
71
|
-
require '
|
72
|
-
|
75
|
+
require 'cambridge_dict'
|
76
|
+
require 'cambridge_dict/server'
|
77
|
+
run CambridgeDict::Server
|
78
|
+
|
79
|
+
rackup config.ru
|
73
80
|
|
74
|
-
|
81
|
+
# curl localhost:9292/api/dictionary/:language/:word
|
75
82
|
```
|
76
83
|
|
77
84
|
## Development
|
data/config.ru
CHANGED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'sinatra'
|
2
|
+
require 'sinatra/cors'
|
3
|
+
|
4
|
+
module CambridgeDict
|
5
|
+
class Server < Sinatra::Base
|
6
|
+
register Sinatra::Cors
|
7
|
+
|
8
|
+
set :allow_origin, "*"
|
9
|
+
set :allow_methods, "GET,HEAD,POST"
|
10
|
+
set :allow_headers, "content-type,if-modified-since"
|
11
|
+
set :expose_headers, "location,link"
|
12
|
+
|
13
|
+
client = Client.new
|
14
|
+
|
15
|
+
get '/api/dictionary/:language/:entry' do
|
16
|
+
content_type :json
|
17
|
+
|
18
|
+
result = client.lookup(params[:entry], language: params[:language])
|
19
|
+
|
20
|
+
if result && !result[:word].empty?
|
21
|
+
status 200
|
22
|
+
result.to_json
|
23
|
+
else
|
24
|
+
status 404
|
25
|
+
{error: "word not found"}.to_json
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/cambridge_dict.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'httparty'
|
3
|
-
|
4
|
-
|
5
|
-
require 'cambridge_dict/version'
|
3
|
+
|
4
|
+
require_relative 'cambridge_dict/version'
|
6
5
|
|
7
6
|
module CambridgeDict
|
8
7
|
class Error < StandardError; end
|
9
8
|
|
10
9
|
class Client
|
10
|
+
PARSER = URI::Parser.new
|
11
|
+
|
11
12
|
def initialize
|
12
13
|
@base_url = "https://dictionary.cambridge.org"
|
13
14
|
@wiki_base = "https://simple.wiktionary.org/wiki"
|
@@ -23,7 +24,8 @@ module CambridgeDict
|
|
23
24
|
|
24
25
|
def lookup(word, language: 'en')
|
25
26
|
nation, dict_lang = parse_language(language)
|
26
|
-
|
27
|
+
encoded_word = word.gsub /[\s']/, "-"
|
28
|
+
url = "#{@base_url}/#{nation}/dictionary/#{dict_lang}/#{encoded_word}"
|
27
29
|
|
28
30
|
response = HTTParty.get(url, {
|
29
31
|
headers: {"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.3"},
|
@@ -31,7 +33,9 @@ module CambridgeDict
|
|
31
33
|
return nil unless response.success?
|
32
34
|
|
33
35
|
doc = Nokogiri::HTML(response.body)
|
34
|
-
|
36
|
+
|
37
|
+
encoded_word = PARSER.escape(word)
|
38
|
+
wiki_verbs = fetch_verbs("#{@wiki_base}/#{encoded_word}")
|
35
39
|
|
36
40
|
word = extract_word(doc)
|
37
41
|
|
@@ -41,8 +45,8 @@ module CambridgeDict
|
|
41
45
|
word: word,
|
42
46
|
pos: extract_pos(doc),
|
43
47
|
verbs: wiki_verbs,
|
44
|
-
|
45
|
-
|
48
|
+
pronunciations: extract_pronunciation(doc),
|
49
|
+
definitions: extract_definitions(doc)
|
46
50
|
}
|
47
51
|
end
|
48
52
|
|
@@ -74,7 +78,9 @@ module CambridgeDict
|
|
74
78
|
end
|
75
79
|
|
76
80
|
def fetch_verbs(url)
|
77
|
-
response = HTTParty.get(url
|
81
|
+
response = HTTParty.get(url, {
|
82
|
+
headers: {"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.3"},
|
83
|
+
})
|
78
84
|
return [] unless response.success?
|
79
85
|
|
80
86
|
doc = Nokogiri::HTML(response.body)
|
@@ -93,7 +99,7 @@ module CambridgeDict
|
|
93
99
|
end
|
94
100
|
|
95
101
|
def extract_word(doc)
|
96
|
-
doc.at_css('.
|
102
|
+
doc.at_css('.superentry .tb')&.text
|
97
103
|
end
|
98
104
|
|
99
105
|
def extract_pos(doc)
|
@@ -140,13 +146,15 @@ module CambridgeDict
|
|
140
146
|
end
|
141
147
|
|
142
148
|
pos = entry&.at_css('.pos.dpos')&.text
|
149
|
+
headword = entry&.at_css('.headword .hw')&.text
|
143
150
|
|
144
151
|
definition = {
|
145
152
|
id: index,
|
153
|
+
headword: headword,
|
146
154
|
pos: pos,
|
147
155
|
source: source || nil,
|
148
|
-
text: block.at_css('.def.ddef_d.db')&.text,
|
149
|
-
translation: block.at_css('.def-body.ddef_b > span.trans.dtrans')&.text,
|
156
|
+
text: block.at_css('.def.ddef_d.db')&.text&.strip,
|
157
|
+
translation: block.at_css('.def-body.ddef_b > span.trans.dtrans')&.text&.strip,
|
150
158
|
example: extract_examples(block)
|
151
159
|
}
|
152
160
|
|
@@ -171,28 +179,4 @@ module CambridgeDict
|
|
171
179
|
end
|
172
180
|
end
|
173
181
|
|
174
|
-
class API < Sinatra::Base
|
175
|
-
register Sinatra::Cors
|
176
|
-
|
177
|
-
set :allow_origin, "*"
|
178
|
-
set :allow_methods, "GET,HEAD,POST"
|
179
|
-
set :allow_headers, "content-type,if-modified-since"
|
180
|
-
set :expose_headers, "location,link"
|
181
|
-
|
182
|
-
client = Client.new
|
183
|
-
|
184
|
-
get '/api/dictionary/:language/:entry' do
|
185
|
-
content_type :json
|
186
|
-
|
187
|
-
result = client.lookup(params[:entry], language: params[:language])
|
188
|
-
|
189
|
-
if result && !result[:word].empty?
|
190
|
-
status 200
|
191
|
-
result.to_json
|
192
|
-
else
|
193
|
-
status 404
|
194
|
-
{error: "word not found"}.to_json
|
195
|
-
end
|
196
|
-
end
|
197
|
-
end
|
198
182
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cambridge_dict
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben D'Angelo
|
@@ -14,58 +14,30 @@ dependencies:
|
|
14
14
|
name: nokogiri
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.15'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.15'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: httparty
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0.21'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0.21'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: sinatra
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
31
|
+
- - ">="
|
46
32
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
33
|
+
version: 0.1.0
|
48
34
|
type: :runtime
|
49
35
|
prerelease: false
|
50
36
|
version_requirements: !ruby/object:Gem::Requirement
|
51
37
|
requirements:
|
52
|
-
- - "
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '3.1'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: sinatra-cors
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '1.2'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
38
|
+
- - ">="
|
67
39
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
40
|
+
version: 0.1.0
|
69
41
|
- !ruby/object:Gem::Dependency
|
70
42
|
name: rspec
|
71
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -119,6 +91,7 @@ files:
|
|
119
91
|
- Rakefile
|
120
92
|
- config.ru
|
121
93
|
- lib/cambridge_dict.rb
|
94
|
+
- lib/cambridge_dict/server.rb
|
122
95
|
- lib/cambridge_dict/version.rb
|
123
96
|
homepage: https://codeberg.org/bendangelo/cambridge_dict
|
124
97
|
licenses:
|