synonym_scrapper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 2563a462cf460be24bedbe5fbfd013b87960b0ccaea6d8e57184127542eaadeb
4
+ data.tar.gz: d47a0235d4f94992cfb3b7821fcd73e8b23f54b444465913fa84df2503f0a0f6
5
+ SHA512:
6
+ metadata.gz: f8f50edf8dea422951e5c64cb30b40b3f3a9edf1e442003428676314ba3cded9fecdbebb8ff0f35cca52605f9715b86057eaf0facd0b076540f98ae5bccd4418
7
+ data.tar.gz: 24d1ae426c3ece1fde1db0a0bdea84f0780a24a7b9bef1b478e54a0c5347c8e2d679b22ce353856baaaec31d995e610c86608b4cfd719f643459d21411cf5ce8
@@ -0,0 +1,8 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
@@ -0,0 +1,6 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.7.1
6
+ before_install: gem install bundler -v 2.1.4
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in synonym_scrapper.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "minitest", "~> 5.0"
8
+ gem "nokogiri", "~> 1.10"
@@ -0,0 +1,26 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ synonym_scrapper (0.1.0)
5
+ nokogiri (~> 1.10)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ mini_portile2 (2.4.0)
11
+ minitest (5.14.2)
12
+ nokogiri (1.10.10)
13
+ mini_portile2 (~> 2.4.0)
14
+ rake (12.3.3)
15
+
16
+ PLATFORMS
17
+ ruby
18
+
19
+ DEPENDENCIES
20
+ minitest (~> 5.0)
21
+ nokogiri (~> 1.10)
22
+ rake (~> 12.0)
23
+ synonym_scrapper!
24
+
25
+ BUNDLED WITH
26
+ 2.1.4
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 Nicolás Mariángel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,74 @@
1
+ # SynonymScrapper
2
+
3
+ Synonym Scrapper is a ruby gem that obtains spanish synoynms from various sources. Currently three synonym sources are supported:
4
+ * Datamuse API ([link](https://www.datamuse.com/api/))
5
+ * Educalingo dictionary ([link](https://educalingo.com/en/dic-es))
6
+ * Natural Language Toolkit ([link](https://www.nltk.org/))
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ ```ruby
13
+ gem 'synonym_scrapper'
14
+ ```
15
+
16
+ And then execute:
17
+
18
+ $ bundle install
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install synonym_scrapper
23
+
24
+ ### Using NLTK
25
+
26
+ First you need to have Python3 installed on the machine that will use this gem. Then you need to install NLTK:
27
+
28
+ $ pip3 install nltk
29
+
30
+ And download its data through python (you can run these through the python interpreter):
31
+
32
+ ```python
33
+ import nltk
34
+ nltk.download()
35
+ ```
36
+ From the packages available in NLTK you'll need to download omw (Open Multilingual Wordnet).
37
+
38
+ ## Usage
39
+
40
+ To use this gem you first need to require it:
41
+
42
+ ```ruby
43
+ require 'synonym_scrapper'
44
+ ```
45
+
46
+ Then you need to create a SynonymScrapper instance and request synonyms from it using one of the dictionaries available. Example:
47
+
48
+ ```ruby
49
+ scrapper = SynonymScrapper::SynonymScrapper.new
50
+
51
+ scrapper.synonyms("entretenimiento", :datamuse)
52
+ scrapper.synonyms("saltar", :educalingo)
53
+ scrapper.synonyms("auto", :nltk)
54
+ ```
55
+
56
+ Data obtained is an array of hashes containing the keys `:word` and `:score`.
57
+
58
+ ## Development
59
+
60
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
61
+
62
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
63
+
64
+ ## To do
65
+
66
+ - [] Implement more synonym sources
67
+ - [] Use datamuse's API full capabilities
68
+ - [] Filter data obtained from dictionaries by score
69
+ - [] Add method to get synonyms from all dictionaries
70
+ - [] Extend to more languages
71
+
72
+ ## License
73
+
74
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList["test/**/*_test.rb"]
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "synonym_scrapper"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,83 @@
1
+ require "synonym_scrapper/version"
2
+
3
+ require 'synonym_scrapper/datamuse'
4
+ require 'synonym_scrapper/educalingo'
5
+ require 'synonym_scrapper/nltk'
6
+
7
+ module SynonymScrapper
8
+
9
+ ##
10
+ # Exception thrown by any synonym_scrapper error.
11
+
12
+ class Error < StandardError; end
13
+
14
+ ##
15
+ # Exception thrown when a wrong dicitonary name is used.
16
+
17
+ class DictionaryNotAvailable < Error
18
+ def initialize(dictionary = "")
19
+ msg = "The dictionary named #{dictionary.to_s} does not exist in the available dictionaries list"
20
+ super msg
21
+ end
22
+ end
23
+
24
+ ##
25
+ # Exception thrown when the word is not a string.
26
+
27
+ class WordFormatError < Error
28
+ def initialize(word = "")
29
+ msg = "The word #{word} is not a string, it must be a string."
30
+ super msg
31
+ end
32
+ end
33
+
34
+ ##
35
+ # SynonymScrapper holds the synonym sources to be used and allows making
36
+ # requests to each of these dictionaries by their symbol.
37
+ #
38
+
39
+ class SynonymScrapper
40
+
41
+ ##
42
+ # Hash with all dictionaries that can be used. Stored in a class variable.
43
+ #
44
+ # All dictionaries implement the synonyms method.
45
+
46
+ @@synonym_dictionaries = {
47
+ Datamuse: Datamuse.new,
48
+ Educalingo: Educalingo.new,
49
+ Nltk: Nltk.new
50
+ }
51
+
52
+ ##
53
+ # Getter for the +synonym_dictionaries+ class variable
54
+
55
+ def synonym_dictionaries
56
+ @@synonym_dictionaries
57
+ end
58
+
59
+ ##
60
+ # Request the synonyms of a +word+ from the selected +dictionary+.
61
+ #
62
+ # A request to all dictionaries available can be made by iterating over the
63
+ # keys in class variable +synonym_dictionaries+
64
+ #
65
+ # A DictionaryNotAvailable is raised if a wrong +dictionary+ key is given.
66
+ # A WordFormatError is raised if +word+ is not a string.
67
+
68
+ def synonyms word, dictionary
69
+ raise DictionaryNotAvailable, dictionary unless dictionary_exists?(dictionary)
70
+ raise WordFormatError, word unless word.is_a? String
71
+
72
+ return synonym_dictionaries[dictionary.capitalize].synonyms(word)
73
+ end
74
+
75
+ ##
76
+ # Checks if the given +dictionary+ symbol is a key in the
77
+ # +synonym_dictionaries+ class variable
78
+
79
+ def dictionary_exists? dictionary
80
+ synonym_dictionaries.key?(dictionary.capitalize)
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,42 @@
1
+ require 'json'
2
+ require 'synonym_scrapper/scrapper'
3
+
4
+ module SynonymScrapper
5
+
6
+ ##
7
+ # Scrapper for datamuse's API
8
+
9
+ class Datamuse < Scrapper
10
+
11
+ ##
12
+ # Initialize the parent Scrapper Class
13
+
14
+ def initialize()
15
+ super(3, "https://api.datamuse.com/words?v=es&max=40&ml=")
16
+ end
17
+
18
+ ##
19
+ # Build the url to be called using this class' +base_url+ and a +word+.
20
+ # Returns an url to where +word+'s synonyms can be obtained.
21
+
22
+ def build_call_url(word)
23
+ URI.parse(base_url + word)
24
+ end
25
+
26
+ ##
27
+ # Obtain synonyms of a +word+ from Datamuse.
28
+
29
+ def synonyms(word, options = {})
30
+ response = call(word).read
31
+
32
+ synonyms = Array.new
33
+ JSON.parse(response).each do |synonym|
34
+ synonyms.push({
35
+ word: synonym["word"],
36
+ score: synonym["score"]
37
+ })
38
+ end
39
+ return synonyms
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,48 @@
1
+ require 'nokogiri'
2
+ require 'synonym_scrapper/scrapper'
3
+
4
+ module SynonymScrapper
5
+
6
+ ##
7
+ # Scrapper for Educalingo's website
8
+
9
+ class Educalingo < Scrapper
10
+
11
+ ##
12
+ # Initialize the parent Scrapper Class
13
+
14
+ def initialize()
15
+ super(5, "https://educalingo.com/en/dic-es/")
16
+ end
17
+
18
+ ##
19
+ # Build the url to be called using this class' +base_url+ and a +word+.
20
+ # Returns an url to where +word+'s synonyms can be obtained.
21
+
22
+ def build_call_url(word)
23
+ URI.parse(base_url + word)
24
+ end
25
+
26
+ ##
27
+ # Obtain synonyms of a +word+ from Educalingo.
28
+
29
+ def synonyms(word, options = {})
30
+ response = call(word)
31
+ doc = Nokogiri.HTML(response)
32
+ synonyms = Array.new
33
+ doc.css('#wordcloud1 > span').each do |synonym|
34
+ score = Integer(synonym.values[0])
35
+
36
+ synonyms.push({
37
+ word: synonym.inner_html,
38
+ score: score
39
+ }) unless score < 75
40
+ # A minimum score of 75 is considered because educalingo
41
+ # tends to have completely unrelated words around this score
42
+ end
43
+
44
+ return synonyms
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,36 @@
1
+ require 'json'
2
+
3
+ module SynonymScrapper
4
+
5
+ ##
6
+ # Connector and requester of python's NLTK
7
+
8
+ class Nltk
9
+
10
+ ##
11
+ # Obtain synonyms of a +word+ from the NLTK.
12
+ #
13
+ # Makes a call to a python script and parses its results.
14
+
15
+ def synonyms(word, options = {})
16
+
17
+ begin
18
+ nltk_response = `python3 #{__dir__}/nltk_parser.py "#{word}"`
19
+ related_words = JSON.parse(nltk_response)["relations"][word]
20
+
21
+ synonyms = Array.new
22
+ related_words.each do |synonym|
23
+ synonyms.push({
24
+ word: synonym["word"],
25
+ score: synonym["score"]
26
+ })
27
+ end
28
+ return synonyms
29
+ rescue => e
30
+ puts e
31
+ return []
32
+ end
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,114 @@
1
+ # This script was created using the following tools:
2
+ # - Python3
3
+ # - nltk (Natural Language Toolkit), installed using pip3
4
+ # - From the nltk, wordnet and omw (Open Multilingual Wordnet)
5
+
6
+ from nltk.corpus import wordnet as wn
7
+ import json
8
+ import sys
9
+
10
+ # Everything that can be obtained from the WordNet interface:
11
+ # - Synonyms: words contained within the same WordNet synset (example: ocean → sea)
12
+ # - Hypernyms: "Kind of" (example: gondola → boat)
13
+ # - Hyponyms: "More general than" (example: boat → gondola)
14
+ # - Holonyms: "Comprises" (example: car → accelerator)
15
+ # - Meronyms: "Part of" (example: trunk → tree)
16
+ # - pertainym: "Of or pertaining to"
17
+ # - Derivationally related forms: Terms in different syntactic categories that have the same root form and are semantically related.
18
+
19
+ # Configuration
20
+ langKey = "spa"
21
+ withTypeOfRelation = False
22
+
23
+ # Function definitions
24
+
25
+ # Obtain a list of synonyms from a synset, translate the Synset to a
26
+ # language specific lemma and then obtain its name only if it is a single word
27
+ # Input: array of Synset elements
28
+ # Output set of strings containing the translated words
29
+ def synonyms(synList):
30
+ relatedWords = set()
31
+ for relatedWord in synList:
32
+ for lemma in relatedWord.lemmas(lang=langKey):
33
+ name = lemma.name()
34
+ if not '_' in name:
35
+ relatedWords.add(lemma.name())
36
+ return relatedWords
37
+
38
+ # Obtain a list of related words from a lemma list.
39
+ # First it translates the lemma to its language agnostic synset and then
40
+ # back to a language specific lemma.
41
+ # Input: array of Lemma elements
42
+ # Output set of strings containing the related translated words
43
+ def get_related_lemmas(lemmaList):
44
+ relatedLemmas = set()
45
+ for lemma in lemmaList:
46
+ for langLemma in lemma.synset().lemmas(lang=langKey):
47
+ name = langLemma.name()
48
+ if not '_' in name:
49
+ relatedLemmas.add(lemma.name())
50
+ return relatedLemmas
51
+
52
+ def format_synonyms(wordList, relationType):
53
+ results = []
54
+ for word in wordList:
55
+ newWord = {
56
+ 'word': word,
57
+ 'relation': relationType,
58
+ 'score' : 100
59
+ }
60
+ results.append(newWord)
61
+ return results
62
+
63
+ # Read input and initalize variables
64
+ wordList = []
65
+
66
+ argumentText = sys.argv[1]
67
+ fileInput = False
68
+ if '.' in argumentText and argumentText[-1] != '.':
69
+ # Then a file is going to be read to get the list of words
70
+ fileInput = True
71
+ wordFile = open(argumentText, 'r')
72
+ for word in wordFile.read().split('\n'):
73
+ wordList.append(word)
74
+ wordFile.close()
75
+ else:
76
+ # Otherwise assume the input is a single word to be processed
77
+ wordList.append(argumentText)
78
+
79
+ wordRelations = {}
80
+ newWordList = set()
81
+
82
+ # Process each word and write its relations to the final file
83
+ for word in wordList:
84
+ lemmas = wn.lemmas(word, lang=langKey);
85
+ synsets = wn.synsets(word, lang=langKey);
86
+
87
+ relatedWords = []
88
+ relatedWords += format_synonyms(synonyms(synsets), 'synonym')
89
+
90
+ for synset in synsets:
91
+ relatedWords += format_synonyms(synonyms(synset.hyponyms()), "hyponym")
92
+ relatedWords += format_synonyms(synonyms(synset.hypernyms()), "hypernym")
93
+ relatedWords += format_synonyms(synonyms(synset.member_holonyms()), "holonym")
94
+ relatedWords += format_synonyms(synonyms(synset.part_meronyms()), "meronym")
95
+ relatedWords += format_synonyms(synonyms(synset.substance_meronyms()), "meronym")
96
+ for lemma in lemmas:
97
+ relatedWords += format_synonyms(get_related_lemmas(lemma.derivationally_related_forms()), "derivationally")
98
+ relatedWords += format_synonyms(get_related_lemmas(lemma.pertainyms()), "pertainym")
99
+
100
+ wordRelations[word] = relatedWords
101
+
102
+ newWordList.add(word)
103
+ for relatedWord in relatedWords:
104
+ newWordList.add(relatedWord['word'])
105
+
106
+ # Build output as JSON
107
+ jsonData = {
108
+ 'words': list(sorted(newWordList)),
109
+ 'relations': {}
110
+ }
111
+ for key, value in wordRelations.items():
112
+ jsonData['relations'][key] = list(map(dict, frozenset(frozenset(i.items()) for i in value)))
113
+ print(json.dumps(jsonData))
114
+
@@ -0,0 +1,95 @@
1
+ require 'open-uri'
2
+
3
+ module SynonymScrapper
4
+
5
+ ##
6
+ # Base scrapper used to scrape APIs/websites
7
+
8
+ class Scrapper
9
+
10
+ ##
11
+ # List of user agents to select from when scraping.
12
+ USER_AGENTS = [
13
+ 'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
14
+ 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0',
15
+ 'Mozilla/5.0 (Windows; U; Win 9x 4.90; SG; rv:1.9.2.4) Gecko/20101104 Netscape/9.1.0285',
16
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0',
17
+ 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36',
18
+ 'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201',
19
+ 'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
20
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.13+ (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
21
+ 'Mozilla/5.0 (iPhone; U; ru; CPU iPhone OS 4_2_1 like Mac OS X; ru) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148a Safari/6533.18.5',
22
+ 'Mozilla/5.0 (Linux; U; Android 2.3.4; fr-fr; HTC Desire Build/GRJ22) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1',
23
+ 'Mozilla/5.0 (BlackBerry; U; BlackBerry 9900; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.346 Mobile Safari/534.11+',
24
+ 'Mozilla/5.0 (Linux; U; Android 2.2; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1',
25
+ ]
26
+
27
+ ##
28
+ # Number, denotes the maximum number of retries to do for each failed request.
29
+ attr_accessor :max_retries
30
+
31
+ ##
32
+ # Number, denotes how many more retries will be done for a request.
33
+ attr_accessor :retries_left
34
+
35
+ ##
36
+ # Base url of the API/website to be consulted.
37
+ attr_accessor :base_url
38
+
39
+ ##
40
+ # Initilalize the scrapper with the +base_url+ to scrape and the maximum
41
+ # number of retries, +max_retries+
42
+
43
+ def initialize max_retries, base_url
44
+ @max_retries = max_retries
45
+ @retries_left = max_retries
46
+ @base_url = base_url
47
+ end
48
+
49
+ ##
50
+ # Method to be overwritten by classes that inherit from this one
51
+ # endpoint can be anything [Array, Hash, String, etc] as long as
52
+ # it is used consistently in the child class.
53
+
54
+ def build_call_url endpoint
55
+ raise Error, "This method must be redefined in subclasses"
56
+ end
57
+
58
+ ##
59
+ # Executes a call to the given +endpoint+ and returns its response.
60
+ #
61
+ # In case of HTTP Error, method will retry +@max_retries+ times.
62
+ # In case of a 404 response, then it will be assumed that retrying
63
+ # would be useless and an empty array is returned.
64
+ # No retrying is done for other types of errors.
65
+
66
+ def call endpoint
67
+ uri = build_call_url(endpoint)
68
+ begin
69
+ response = URI.open(uri, 'User-Agent' => USER_AGENTS.sample)
70
+ rescue OpenURI::HTTPError => e
71
+ puts e
72
+ return [] if e.message == '404 Not Found'
73
+ retry_call endpoint unless @retries_left <= 0
74
+ rescue => e
75
+ puts e
76
+ end
77
+ # Reset the retries_left variable on this instance after each request
78
+ @retries_left = @max_retries
79
+ return response
80
+ end
81
+
82
+ ##
83
+ # Retry the call to the +endpoint+ specified after a waiting between
84
+ # 50 and 1000 milliseconds (random sleep)
85
+
86
+ def retry_call endpoint
87
+ @retries_left -= 1
88
+
89
+ sleepTime = (50 + rand(950)) / 1000
90
+ sleep(sleepTime)
91
+
92
+ call(endpoint)
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,3 @@
1
+ module SynonymScrapper
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,37 @@
1
+ require_relative 'lib/synonym_scrapper/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "synonym_scrapper"
5
+ spec.version = SynonymScrapper::VERSION
6
+ spec.authors = ["Nicolás Mariángel"]
7
+ spec.email = ["nicolas.mariangel@usach.cl"]
8
+
9
+ spec.summary = %q{Synonym scrapper for spanish words / Scrapper de sinónimos para palabras en español.}
10
+ spec.description = <<-EOF
11
+ English: Gem that scrapes spanish synonyms from various sources.
12
+ Currently three sources are supported: Datamuse, Educalingo y NLTK.
13
+
14
+ Spanish: Gema que permite consultar por sinonimos en español desde
15
+ los sitios: Datamuse, Educalingo y desde la interfaz nltk de python.
16
+ EOF
17
+ spec.homepage = "https://rubygems.org/gems/synonym_scrapper"
18
+ spec.license = "MIT"
19
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
20
+
21
+ spec.requirements << 'Python3 installed along NLTK with omw'
22
+
23
+ spec.metadata["homepage_uri"] = spec.homepage
24
+ spec.metadata["source_code_uri"] = "https://github.com/Uriyah3/ruby_synonym_scrapper"
25
+ #spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
26
+
27
+ # Specify which files should be added to the gem when it is released.
28
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
29
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
30
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
31
+ end
32
+ spec.bindir = "exe"
33
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
34
+ spec.require_paths = ["lib"]
35
+
36
+ spec.add_dependency "nokogiri", "~> 1.10"
37
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: synonym_scrapper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Nicolás Mariángel
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-10-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
27
+ description: " English: Gem that scrapes spanish synonyms from various sources.\n
28
+ \ Currently three sources are supported: Datamuse, Educalingo y NLTK.\n\n Spanish:
29
+ Gema que permite consultar por sinonimos en español desde \n los sitios: Datamuse,
30
+ Educalingo y desde la interfaz nltk de python.\n"
31
+ email:
32
+ - nicolas.mariangel@usach.cl
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - ".gitignore"
38
+ - ".travis.yml"
39
+ - Gemfile
40
+ - Gemfile.lock
41
+ - LICENSE.txt
42
+ - README.md
43
+ - Rakefile
44
+ - bin/console
45
+ - bin/setup
46
+ - lib/synonym_scrapper.rb
47
+ - lib/synonym_scrapper/datamuse.rb
48
+ - lib/synonym_scrapper/educalingo.rb
49
+ - lib/synonym_scrapper/nltk.rb
50
+ - lib/synonym_scrapper/nltk_parser.py
51
+ - lib/synonym_scrapper/scrapper.rb
52
+ - lib/synonym_scrapper/version.rb
53
+ - synonym_scrapper.gemspec
54
+ homepage: https://rubygems.org/gems/synonym_scrapper
55
+ licenses:
56
+ - MIT
57
+ metadata:
58
+ homepage_uri: https://rubygems.org/gems/synonym_scrapper
59
+ source_code_uri: https://github.com/Uriyah3/ruby_synonym_scrapper
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: 2.3.0
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ requirements:
75
+ - Python3 installed along NLTK with omw
76
+ rubygems_version: 3.1.2
77
+ signing_key:
78
+ specification_version: 4
79
+ summary: Synonym scrapper for spanish words / Scrapper de sinónimos para palabras
80
+ en español.
81
+ test_files: []