nebrija 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << "test"
5
+ t.test_files = FileList['test/test*.rb']
6
+ end
7
+
8
+ desc "Run tests"
9
+ task :default => :test
data/bin/nebrija ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'json'
4
+ require 'nebrija'
5
+
6
+ puts 'Oh, hai!'
7
+ puts 'In future versions this will print something more human readable.'
8
+ puts
9
+ puts JSON.pretty_generate(HTTPRae.new.search(ARGV[0]))
@@ -0,0 +1,75 @@
1
+ require 'nokogiri'
2
+
3
+ class Parser
4
+
5
+ META_REGEX = /^([a-zA-Z]{1,4}+\.[ ]{1,2})+/
6
+
7
+ def initialize(rae_data, word)
8
+ @doc = Nokogiri::HTML(rae_data
9
+ .gsub(/[\n]+/, '')
10
+ .gsub(/[ ]{2,}+/, ' '))
11
+ @word = word
12
+ end
13
+
14
+ def parse
15
+
16
+ return {:error => 'Word does not exist. Sorry.'} if !valid?
17
+
18
+ if single?
19
+ parse_single
20
+ else
21
+ parse_multiple
22
+ end
23
+ end
24
+
25
+ def single?
26
+ @doc.css('body > ul').length.zero?
27
+ end
28
+
29
+ private
30
+ def parse_single
31
+ data = []
32
+ result = {:id => @doc.css('body > div > a').first['name'].to_i, :data => data}
33
+ state = :entry # TODO. Improve FSM syntax.
34
+ index = -1 # HACK(javierhonduco)
35
+
36
+ @doc.css('body > div > p').each do |entry|
37
+ if entry['class'] == 'p' and state == :entry
38
+ word = entry.css('span').inner_text
39
+ word = '=>' if word == ''
40
+ data << {
41
+ :word => word.gsub(/~/, @word).strip.capitalize,
42
+ :meanings => []
43
+ }
44
+ index+=1
45
+ else
46
+ text = entry.inner_text.strip.gsub(/[0-9]+\.[ ]/, '')
47
+ next if text[0] == '(' # Del latín, Nil.
48
+ unparsed_meta = text.scan META_REGEX
49
+ text = text.gsub(META_REGEX, '')
50
+ data[index][:meanings] << {
51
+ :word => text,
52
+ :meta => (unparsed_meta.join.strip if unparsed_meta.join.strip != ''),
53
+ } if !text.nil? and text != ''
54
+ state = :definitions
55
+ end
56
+ state = :entry
57
+ end
58
+ result
59
+ end
60
+
61
+ def parse_multiple
62
+ multiple_result = []
63
+ @doc.css('body > ul > li > a').each do |word|
64
+ multiple_result << {
65
+ :word => word.css('span').first.inner_text,
66
+ :href => word['href'].gsub(/search\?id=/, '')
67
+ }
68
+ end
69
+ multiple_result
70
+ end
71
+
72
+ def valid?
73
+ (@doc.css('title').inner_text =~/error/).nil?
74
+ end
75
+ end
data/lib/nebrija.rb ADDED
@@ -0,0 +1,64 @@
1
+ require 'nebrija/parser'
2
+ require 'typhoeus'
3
+
4
+
5
+ class Rae
6
+
7
+ def search(word)
8
+ Parser.new(query(word), word).parse
9
+ end
10
+
11
+ private
12
+ def query(word)
13
+ raise 'NotImplementedError'
14
+ end
15
+ end
16
+
17
+
18
+ class FileRae < Rae
19
+
20
+ private
21
+ def query(file)
22
+ IO.read(file)
23
+ end
24
+ end
25
+
26
+
27
+ class HTTPRae < Rae
28
+ USER_AGENT = 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36'
29
+ SEARCH_URL = 'http://lema.rae.es/drae/srv/search?'
30
+ REQUEST_TIMEOUT =
31
+ ID_REGEX = /[0-9]/
32
+
33
+ private
34
+ def query(word)
35
+ @word = word
36
+
37
+ params = 'id='
38
+ params = 'val=' if val?
39
+
40
+ response = Typhoeus::Request.post(
41
+ "http://lema.rae.es/drae/srv/search?#{params}#{word}",
42
+ body: build_headers
43
+ )
44
+ response.body
45
+ end
46
+
47
+ def val?
48
+ (@word =~ ID_REGEX).nil?
49
+ end
50
+
51
+ def build_headers
52
+ {
53
+ 'TS014dfc77_id' => 3,
54
+ 'TS014dfc77_cr' => '42612abd48551544c72ae36bc40f440a%3Akkmj%3AQG60Q2v4%3A1477350835',
55
+ 'TS014dfc77_76' => 0,
56
+ 'TS014dfc77_md' => 1,
57
+ 'TS014dfc77_rf' => 0,
58
+ 'TS014dfc77_ct' => 0,
59
+ 'TS014dfc77_pd' => 0
60
+ }.map {|key, value|
61
+ "#{key}=#{value}"
62
+ }.join('&')
63
+ end
64
+ end
@@ -0,0 +1,63 @@
1
+ require 'test/unit'
2
+ require 'nebrija'
3
+
4
+ MOCKS_DIR = "#{Dir.pwd}/test/mocks"
5
+
6
+ class TestMockedParserBasic < Test::Unit::TestCase
7
+
8
+ def test_error_basic
9
+ assert_not_nil FileRae.new.search("#{MOCKS_DIR}/error.html")[:error]
10
+ end
11
+
12
+ def test_single_basic
13
+ assert_not_nil FileRae.new.search("#{MOCKS_DIR}/single.html")[:data]
14
+ end
15
+
16
+ def test_multiple_basic
17
+ assert FileRae.new.search("#{MOCKS_DIR}/multiple.html").length == 2
18
+ end
19
+ end
20
+
21
+ class TestMockedParserContent < Test::Unit::TestCase
22
+
23
+ def test_single_basic
24
+ assert FileRae.new.search("#{MOCKS_DIR}/single.html")[:data].length > 20
25
+ end
26
+
27
+ def test_multiple_basic
28
+ assert FileRae.new.search("#{MOCKS_DIR}/multiple.html")[0][:word] == 'bancar'
29
+ assert FileRae.new.search("#{MOCKS_DIR}/multiple.html")[1][:word] == 'banco'
30
+ end
31
+ end
32
+
33
+
34
+ class TestMockedParserBasic < Test::Unit::TestCase
35
+
36
+ def test_single_basic_id
37
+ assert_not_nil HTTPRae.new.search('MHpGWYJ6YDXX2bw9Ghwm')[:data]
38
+ end
39
+
40
+ def test_error_basic
41
+ assert_not_nil HTTPRae.new.search('jddhfgsd')[:error]
42
+ end
43
+
44
+ def test_single_basic
45
+ assert_not_nil HTTPRae.new.search('a')[:data]
46
+ end
47
+
48
+ def test_multiple_basic
49
+ assert HTTPRae.new.search('banco').length == 2
50
+ end
51
+ end
52
+
53
+ class TestParserContent < Test::Unit::TestCase
54
+
55
+ def test_single_basic
56
+ assert HTTPRae.new.search('a')[:data].length > 4
57
+ end
58
+
59
+ def test_multiple_basic
60
+ assert HTTPRae.new.search('banco')[0][:word] == 'bancar'
61
+ assert HTTPRae.new.search('banco')[1][:word] == 'banco'
62
+ end
63
+ end
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nebrija
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - ! '@javierhonduco'
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-07-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: typhoeus
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: A gem to access the rae dictionary
63
+ email: a@a.a
64
+ executables:
65
+ - nebrija
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - Rakefile
70
+ - lib/nebrija.rb
71
+ - lib/nebrija/parser.rb
72
+ - bin/nebrija
73
+ - test/test_basic.rb
74
+ homepage: http://rubygems.org/gems/nebrija
75
+ licenses:
76
+ - MIT
77
+ post_install_message:
78
+ rdoc_options: []
79
+ require_paths:
80
+ - lib
81
+ required_ruby_version: !ruby/object:Gem::Requirement
82
+ none: false
83
+ requirements:
84
+ - - ! '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ none: false
89
+ requirements:
90
+ - - ! '>='
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubyforge_project:
95
+ rubygems_version: 1.8.23
96
+ signing_key:
97
+ specification_version: 3
98
+ summary: dictionary gem and stuff
99
+ test_files:
100
+ - test/test_basic.rb