nebrija 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << "test"
5
+ t.test_files = FileList['test/test*.rb']
6
+ end
7
+
8
+ desc "Run tests"
9
+ task :default => :test
data/bin/nebrija ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'json'
4
+ require 'nebrija'
5
+
6
+ puts 'Oh, hai!'
7
+ puts 'In future versions this will print something more human readable.'
8
+ puts
9
+ puts JSON.pretty_generate(HTTPRae.new.search(ARGV[0]))
@@ -0,0 +1,75 @@
1
+ require 'nokogiri'
2
+
3
+ class Parser
4
+
5
+ META_REGEX = /^([a-zA-Z]{1,4}+\.[ ]{1,2})+/
6
+
7
+ def initialize(rae_data, word)
8
+ @doc = Nokogiri::HTML(rae_data
9
+ .gsub(/[\n]+/, '')
10
+ .gsub(/[ ]{2,}+/, ' '))
11
+ @word = word
12
+ end
13
+
14
+ def parse
15
+
16
+ return {:error => 'Word does not exist. Sorry.'} if !valid?
17
+
18
+ if single?
19
+ parse_single
20
+ else
21
+ parse_multiple
22
+ end
23
+ end
24
+
25
+ def single?
26
+ @doc.css('body > ul').length.zero?
27
+ end
28
+
29
+ private
30
+ def parse_single
31
+ data = []
32
+ result = {:id => @doc.css('body > div > a').first['name'].to_i, :data => data}
33
+ state = :entry # TODO. Improve FSM syntax.
34
+ index = -1 # HACK(javierhonduco)
35
+
36
+ @doc.css('body > div > p').each do |entry|
37
+ if entry['class'] == 'p' and state == :entry
38
+ word = entry.css('span').inner_text
39
+ word = '=>' if word == ''
40
+ data << {
41
+ :word => word.gsub(/~/, @word).strip.capitalize,
42
+ :meanings => []
43
+ }
44
+ index+=1
45
+ else
46
+ text = entry.inner_text.strip.gsub(/[0-9]+\.[ ]/, '')
47
+ next if text[0] == '(' # Del latín, Nil.
48
+ unparsed_meta = text.scan META_REGEX
49
+ text = text.gsub(META_REGEX, '')
50
+ data[index][:meanings] << {
51
+ :word => text,
52
+ :meta => (unparsed_meta.join.strip if unparsed_meta.join.strip != ''),
53
+ } if !text.nil? and text != ''
54
+ state = :definitions
55
+ end
56
+ state = :entry
57
+ end
58
+ result
59
+ end
60
+
61
+ def parse_multiple
62
+ multiple_result = []
63
+ @doc.css('body > ul > li > a').each do |word|
64
+ multiple_result << {
65
+ :word => word.css('span').first.inner_text,
66
+ :href => word['href'].gsub(/search\?id=/, '')
67
+ }
68
+ end
69
+ multiple_result
70
+ end
71
+
72
+ def valid?
73
+ (@doc.css('title').inner_text =~/error/).nil?
74
+ end
75
+ end
data/lib/nebrija.rb ADDED
@@ -0,0 +1,64 @@
1
+ require 'nebrija/parser'
2
+ require 'typhoeus'
3
+
4
+
5
+ class Rae
6
+
7
+ def search(word)
8
+ Parser.new(query(word), word).parse
9
+ end
10
+
11
+ private
12
+ def query(word)
13
+ raise 'NotImplementedError'
14
+ end
15
+ end
16
+
17
+
18
+ class FileRae < Rae
19
+
20
+ private
21
+ def query(file)
22
+ IO.read(file)
23
+ end
24
+ end
25
+
26
+
27
+ class HTTPRae < Rae
28
+ USER_AGENT = 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36'
29
+ SEARCH_URL = 'http://lema.rae.es/drae/srv/search?'
30
+ REQUEST_TIMEOUT =
31
+ ID_REGEX = /[0-9]/
32
+
33
+ private
34
+ def query(word)
35
+ @word = word
36
+
37
+ params = 'id='
38
+ params = 'val=' if val?
39
+
40
+ response = Typhoeus::Request.post(
41
+ "http://lema.rae.es/drae/srv/search?#{params}#{word}",
42
+ body: build_headers
43
+ )
44
+ response.body
45
+ end
46
+
47
+ def val?
48
+ (@word =~ ID_REGEX).nil?
49
+ end
50
+
51
+ def build_headers
52
+ {
53
+ 'TS014dfc77_id' => 3,
54
+ 'TS014dfc77_cr' => '42612abd48551544c72ae36bc40f440a%3Akkmj%3AQG60Q2v4%3A1477350835',
55
+ 'TS014dfc77_76' => 0,
56
+ 'TS014dfc77_md' => 1,
57
+ 'TS014dfc77_rf' => 0,
58
+ 'TS014dfc77_ct' => 0,
59
+ 'TS014dfc77_pd' => 0
60
+ }.map {|key, value|
61
+ "#{key}=#{value}"
62
+ }.join('&')
63
+ end
64
+ end
@@ -0,0 +1,63 @@
1
+ require 'test/unit'
2
+ require 'nebrija'
3
+
4
+ MOCKS_DIR = "#{Dir.pwd}/test/mocks"
5
+
6
+ class TestMockedParserBasic < Test::Unit::TestCase
7
+
8
+ def test_error_basic
9
+ assert_not_nil FileRae.new.search("#{MOCKS_DIR}/error.html")[:error]
10
+ end
11
+
12
+ def test_single_basic
13
+ assert_not_nil FileRae.new.search("#{MOCKS_DIR}/single.html")[:data]
14
+ end
15
+
16
+ def test_multiple_basic
17
+ assert FileRae.new.search("#{MOCKS_DIR}/multiple.html").length == 2
18
+ end
19
+ end
20
+
21
+ class TestMockedParserContent < Test::Unit::TestCase
22
+
23
+ def test_single_basic
24
+ assert FileRae.new.search("#{MOCKS_DIR}/single.html")[:data].length > 20
25
+ end
26
+
27
+ def test_multiple_basic
28
+ assert FileRae.new.search("#{MOCKS_DIR}/multiple.html")[0][:word] == 'bancar'
29
+ assert FileRae.new.search("#{MOCKS_DIR}/multiple.html")[1][:word] == 'banco'
30
+ end
31
+ end
32
+
33
+
34
+ class TestMockedParserBasic < Test::Unit::TestCase
35
+
36
+ def test_single_basic_id
37
+ assert_not_nil HTTPRae.new.search('MHpGWYJ6YDXX2bw9Ghwm')[:data]
38
+ end
39
+
40
+ def test_error_basic
41
+ assert_not_nil HTTPRae.new.search('jddhfgsd')[:error]
42
+ end
43
+
44
+ def test_single_basic
45
+ assert_not_nil HTTPRae.new.search('a')[:data]
46
+ end
47
+
48
+ def test_multiple_basic
49
+ assert HTTPRae.new.search('banco').length == 2
50
+ end
51
+ end
52
+
53
+ class TestParserContent < Test::Unit::TestCase
54
+
55
+ def test_single_basic
56
+ assert HTTPRae.new.search('a')[:data].length > 4
57
+ end
58
+
59
+ def test_multiple_basic
60
+ assert HTTPRae.new.search('banco')[0][:word] == 'bancar'
61
+ assert HTTPRae.new.search('banco')[1][:word] == 'banco'
62
+ end
63
+ end
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nebrija
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - ! '@javierhonduco'
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-07-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: typhoeus
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: A gem to access the rae dictionary
63
+ email: a@a.a
64
+ executables:
65
+ - nebrija
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - Rakefile
70
+ - lib/nebrija.rb
71
+ - lib/nebrija/parser.rb
72
+ - bin/nebrija
73
+ - test/test_basic.rb
74
+ homepage: http://rubygems.org/gems/nebrija
75
+ licenses:
76
+ - MIT
77
+ post_install_message:
78
+ rdoc_options: []
79
+ require_paths:
80
+ - lib
81
+ required_ruby_version: !ruby/object:Gem::Requirement
82
+ none: false
83
+ requirements:
84
+ - - ! '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ none: false
89
+ requirements:
90
+ - - ! '>='
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubyforge_project:
95
+ rubygems_version: 1.8.23
96
+ signing_key:
97
+ specification_version: 3
98
+ summary: dictionary gem and stuff
99
+ test_files:
100
+ - test/test_basic.rb