cambridge_dict 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c267c98cdc071799bc99ff46db6963e543bf79ba0a96ce1efb391d27fabcd40c
4
+ data.tar.gz: 3bfc706d541138132c78030a885a227eb3f66cb1405235b5c87022420f204b2c
5
+ SHA512:
6
+ metadata.gz: 6f31eeb418116539c5d61b9487a0bfc1d98c0bfa310c986bf7a4ac99a213c46e98e4032a7bc45f0b85ae87cb82ff2ea043fbd7979c600ad94d2442dc84fbb3ba
7
+ data.tar.gz: defbccf32f337d0ddbefac15a85853e27e0487cf6f4d67f77cde947228bc50fe77bc2d0270d948a26b5fe0dd47b93e891d06d75bf433f0781f0d036d03c3aef8
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2024 Ben D'Angelo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,72 @@
1
+ # Cambridge Dict
2
+
3
+ Look up words for cambridge via code or web server.
4
+
5
+ ## Installation
6
+
7
+ ```
8
+ gem install cambridge_dict
9
+ ```
10
+
11
+ ## Client Usage
12
+
13
+ ```
14
+ client = CambridgeDictionary::Client.new
15
+ result = client.lookup("back", language: "en")
16
+
17
+ =>
18
+ {:word=>"back",
19
+ :pos=>["adverb", "noun", "verb", "adjective"],
20
+ :verbs=>
21
+ [{:id=>0, :type=>"Preposition", :text=>"back"},
22
+ {:id=>1, :type=>"Singular", :text=>"back"},
23
+ {:id=>2, :type=>"Plural", :text=>"backs"},
24
+ {:id=>3, :type=>"Plain form", :text=>"back"},
25
+ {:id=>4, :type=>"Third-person singular", :text=>"backs"},
26
+ {:id=>5, :type=>"Past tense", :text=>"backed"},
27
+ {:id=>6, :type=>"Past participle", :text=>"backed"},
28
+ {:id=>7, :type=>"Present participle", :text=>"backing"}],
29
+ :pronunciation=>
30
+ [{:pos=>"adverb", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
31
+ {:pos=>"adverb", :lang=>"uk", :url=>"https://dictionary.cambridge.org/us/media/english/uk_pron/u/uka/ukazt/ukazt__024.mp3", :pron=>"/bæk/"},
32
+ {:pos=>"noun", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
33
+ {:pos=>"noun", :lang=>"uk", :url=>"https://dictionary.cambridge.org/us/media/english/uk_pron/u/uka/ukazt/ukazt__024.mp3", :pron=>"/bæk/"},
34
+ {:pos=>"verb", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
35
+ ...
36
+ :definition=>
37
+ [{:id=>0,
38
+ :pos=>"adverb",
39
+ :source=>"cald4-us",
40
+ :text=>"in, into, or toward a previous place or condition, or an earlier time: ",
41
+ :translation=>nil,
42
+ :example=>
43
+ [{:id=>0, :text=>"He looked back and saw they were following him.", :translation=>nil},
44
+ {:id=>1, :text=>"When you take the scissors, remember to put them back.", :translation=>nil},
45
+ {:id=>2, :text=>"Looking at her old photographs brought back (= made her remember) a lot of memories.", :translation=>nil},
46
+ {:id=>3, :text=>"He left a note saying \"Gone out. Back soon.\"", :translation=>nil},
47
+ {:id=>4, :text=>"She went to Brazil for two years, but now she's back.", :translation=>nil},
48
+ {:id=>5, :text=>"I was woken by a thunderstorm, and I couldn't get back to sleep (= could not sleep again).", :translation=>nil},
49
+ {:id=>6, :text=>"The last time we saw Lowell was back (= at an earlier time) in January.", :translation=>nil},
50
+ {:id=>7, :text=>"This tradition dates back to the 16th century.", :translation=>nil}]},
51
+ {:id=>1,
52
+ :pos=>"adverb",
53
+ :source=>"cald4-us",
54
+ :text=>"in return: ",
55
+ :translation=>nil,
56
+ ```
57
+
58
+ ## Webserver Setup
59
+
60
+ ```
61
+ # config.ru
62
+ require 'cambridge_dictionary'
63
+ run CambridgeDictionary::API
64
+
65
+ ruby config.ru
66
+ ```
67
+
68
+ ## Development
69
+
70
+ After checking out the repo, run bin/setup to install dependencies. Then, run rake spec to run the tests. You can also run bin/console for an interactive prompt that will allow you to experiment.
71
+
72
+ To install this gem onto your local machine, run bundle exec rake install. To release a new version, update the version number in version.rb, and then run bundle exec rake release, which will create a git tag for the version, push git commits and the created tag, and push the .gem file to rubygems.org.
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
data/config.ru ADDED
@@ -0,0 +1,2 @@
1
+ require './lib/cambridge_dict'
2
+ run CambridgeDictionary::API
@@ -0,0 +1,3 @@
1
+ module CambridgeDict
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,174 @@
1
+ require 'nokogiri'
2
+ require 'httparty'
3
+ require 'sinatra'
4
+ require 'sinatra/cors'
5
+ require 'cambridge_dict/version'
6
+
7
+ module CambridgeDict
8
+ class Client
9
+ def initialize
10
+ @base_url = "https://dictionary.cambridge.org"
11
+ @wiki_base = "https://simple.wiktionary.org/wiki"
12
+ end
13
+
14
+ def lookup(word, language: 'en')
15
+ nation, dict_lang = parse_language(language)
16
+ url = "#{@base_url}/#{nation}/dictionary/#{dict_lang}/#{word}"
17
+
18
+ response = HTTParty.get(url, {
19
+ headers: {"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.3"},
20
+ })
21
+ return nil unless response.success?
22
+
23
+ doc = Nokogiri::HTML(response.body)
24
+ wiki_verbs = fetch_verbs("#{@wiki_base}/#{word}")
25
+
26
+ {
27
+ word: extract_word(doc),
28
+ pos: extract_pos(doc),
29
+ verbs: wiki_verbs,
30
+ pronunciation: extract_pronunciation(doc),
31
+ definition: extract_definitions(doc)
32
+ }
33
+ end
34
+
35
+ private
36
+
37
+ def parse_language(lang)
38
+ case lang
39
+ when 'en'
40
+ ['us', 'english']
41
+ when 'uk'
42
+ ['uk', 'english']
43
+ when 'en-tw'
44
+ ['us', 'english-chinese-traditional']
45
+ when 'en-cn'
46
+ ['us', 'english-chinese-simplified']
47
+ else
48
+ ['us', 'english']
49
+ end
50
+ end
51
+
52
+ def fetch_verbs(url)
53
+ response = HTTParty.get(url)
54
+ return [] unless response.success?
55
+
56
+ doc = Nokogiri::HTML(response.body)
57
+ verbs = []
58
+
59
+ rows = doc.css('tr > td > p')
60
+
61
+ lines = rows.map {|i| i.children.collect {|c| c.text } }.flatten.map(&:strip).reject(&:empty?)
62
+
63
+ lines.each_slice(2) do |type, text|
64
+ break if verbs.include?({type: type, text: text})
65
+ verbs << {id: verbs.length, type: type, text: text} if type && text
66
+ end
67
+
68
+ verbs
69
+ end
70
+
71
+ def extract_word(doc)
72
+ doc.at_css('.hw.dhw')&.text || ''
73
+ end
74
+
75
+ def extract_pos(doc)
76
+ doc.css('.pos.dpos').map(&:text).uniq
77
+ end
78
+
79
+ def extract_pronunciation(doc)
80
+ audio = []
81
+
82
+ doc.css('.pos-header.dpos-h').each do |header|
83
+ pos_node = header.at_css('.dpos-g')
84
+ next unless pos_node && !pos_node.children.empty?
85
+
86
+ pos = pos_node.children.first.text
87
+ header.css('.dpron-i').each do |node|
88
+ lang = node.at_css('span:first-child')&.text
89
+ source = node.at_css('source')
90
+ next unless source
91
+
92
+ url = @base_url + source['src']
93
+ pron = node.at_css('span:last-child')&.text
94
+
95
+ audio << {
96
+ pos: pos,
97
+ lang: lang,
98
+ url: url,
99
+ pron: pron
100
+ }
101
+ end
102
+ end
103
+
104
+ audio
105
+ end
106
+
107
+ def extract_definitions(doc)
108
+ definitions = []
109
+
110
+ doc.css('.def-block.ddef_block').each_with_index do |block, index|
111
+ entry = block.ancestors('.pr.entry-body__el').first
112
+ source_result = block.ancestors('.pr.dictionary')&.first
113
+
114
+ if source_result
115
+ source = source_result['data-id']
116
+ end
117
+
118
+ pos = entry&.at_css('.pos.dpos')&.text
119
+
120
+ definition = {
121
+ id: index,
122
+ pos: pos,
123
+ source: source || nil,
124
+ text: block.at_css('.def.ddef_d.db')&.text,
125
+ translation: block.at_css('.def-body.ddef_b > span.trans.dtrans')&.text,
126
+ example: extract_examples(block)
127
+ }
128
+
129
+ definitions << definition
130
+ end
131
+
132
+ definitions
133
+ end
134
+
135
+ def extract_examples(block)
136
+ examples = []
137
+
138
+ block.css('.def-body.ddef_b > .examp.dexamp').each_with_index do |example, index|
139
+ examples << {
140
+ id: index,
141
+ text: example.at_css('.eg.deg')&.text,
142
+ translation: example.at_css('.trans.dtrans')&.text
143
+ }
144
+ end
145
+
146
+ examples
147
+ end
148
+ end
149
+
150
+ class API < Sinatra::Base
151
+ register Sinatra::Cors
152
+
153
+ set :allow_origin, "*"
154
+ set :allow_methods, "GET,HEAD,POST"
155
+ set :allow_headers, "content-type,if-modified-since"
156
+ set :expose_headers, "location,link"
157
+
158
+ client = Client.new
159
+
160
+ get '/api/dictionary/:language/:entry' do
161
+ content_type :json
162
+
163
+ result = client.lookup(params[:entry], language: params[:language])
164
+
165
+ if result && !result[:word].empty?
166
+ status 200
167
+ result.to_json
168
+ else
169
+ status 404
170
+ {error: "word not found"}.to_json
171
+ end
172
+ end
173
+ end
174
+ end
metadata ADDED
@@ -0,0 +1,146 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cambridge_dict
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ben D'Angelo
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-10-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.15'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.15'
27
+ - !ruby/object:Gem::Dependency
28
+ name: httparty
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.21'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.21'
41
+ - !ruby/object:Gem::Dependency
42
+ name: sinatra
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.1'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.1'
55
+ - !ruby/object:Gem::Dependency
56
+ name: sinatra-cors
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.2'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.2'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.2'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.2'
83
+ - !ruby/object:Gem::Dependency
84
+ name: vcr
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '6.3'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '6.3'
97
+ - !ruby/object:Gem::Dependency
98
+ name: webmock
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '3.24'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '3.24'
111
+ description: A Ruby gem to interact with the Cambridge Dictionary website
112
+ email: ben@bendangelo.me
113
+ executables: []
114
+ extensions: []
115
+ extra_rdoc_files: []
116
+ files:
117
+ - LICENSE.txt
118
+ - README.md
119
+ - Rakefile
120
+ - config.ru
121
+ - lib/cambridge_dict.rb
122
+ - lib/cambridge_dict/version.rb
123
+ homepage: https://codeberg.org/bendangelo/cambridge_dict
124
+ licenses:
125
+ - MIT
126
+ metadata: {}
127
+ post_install_message:
128
+ rdoc_options: []
129
+ require_paths:
130
+ - lib
131
+ required_ruby_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ required_rubygems_version: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ requirements: []
142
+ rubygems_version: 3.5.21
143
+ signing_key:
144
+ specification_version: 4
145
+ summary: Cambridge Dictionary API wrapper
146
+ test_files: []