nebrija 0.2.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/nebrija +2 -1
- data/lib/nebrija/parser.rb +57 -36
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4e22c43f3c2a0f893f6e66a2f64337d3c813377a
|
4
|
+
data.tar.gz: 08bb77b1e7d90bf3a9201ec3b76b8c8a2275ec75
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf5634e8b8e06f50578d525301947f9c728358ace2bed1a46c80e3582563c2f0670004dd8c275a53a3616abf06ef9e29fd4d8f9d4bc49db98591ae917bee56ca
|
7
|
+
data.tar.gz: 7992fbe7cbfee315992699ca37b6a830215fc50e754ff8def65c7fb025cbc511c09da0fc7ff0f8bfdd42cb54eb1f9afd2bcad706f48bc2f71c05be2477fd74a5
|
data/bin/nebrija
CHANGED
data/lib/nebrija/parser.rb
CHANGED
@@ -14,8 +14,8 @@ class Parser
|
|
14
14
|
if valid?
|
15
15
|
{
|
16
16
|
:status => 'success',
|
17
|
-
:type =>
|
18
|
-
:response =>
|
17
|
+
:type => 'single',
|
18
|
+
:response => parse_single
|
19
19
|
}
|
20
20
|
else
|
21
21
|
{
|
@@ -28,46 +28,69 @@ class Parser
|
|
28
28
|
private
|
29
29
|
|
30
30
|
def single?
|
31
|
-
@doc.css('
|
31
|
+
@doc.css('article').length == 1
|
32
32
|
end
|
33
33
|
|
34
34
|
def parse_single
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
35
|
+
response = {
|
36
|
+
:basic_meanings => [],
|
37
|
+
:other_meanings => []
|
38
|
+
}
|
39
|
+
|
40
|
+
response[:word] = @doc.css('header').inner_text.sub('.', '')
|
41
|
+
|
42
|
+
@doc.css('body > div > article > p').each_with_index do |entry, index|
|
43
|
+
if index.zero? # Parsing etymology
|
44
|
+
response[:etymology] = entry.inner_text
|
45
|
+
elsif entry['class'] =~ /j[0-9]*/
|
46
|
+
# Parsing first meaning
|
47
|
+
response[:basic_meanings] << metadata(entry.inner_text)
|
48
|
+
elsif entry['class'] == 'm' || entry['class'] =~/k[0-9]*/
|
49
|
+
# Parsing other meanings
|
50
|
+
# k is the expression with 1 element
|
51
|
+
# m is the meaning with >= elements
|
52
|
+
type = (:meaning if entry['class'] == 'm') || :expression
|
53
|
+
response[:other_meanings] << [type, entry.inner_text]
|
54
|
+
end
|
55
|
+
end
|
55
56
|
|
56
|
-
|
57
|
+
clean! response
|
58
|
+
end
|
59
|
+
|
60
|
+
def clean! response
|
61
|
+
parsed_meanings = []
|
62
|
+
state = :EXPR
|
63
|
+
temp = nil
|
57
64
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
65
|
+
response[:other_meanings].each do |type, text|
|
66
|
+
state = :EXPR if type == :expression
|
67
|
+
if state == :EXPR
|
68
|
+
unless temp.nil?
|
69
|
+
parsed_meanings << temp
|
70
|
+
end
|
71
|
+
temp = {
|
72
|
+
:expression => text,
|
73
|
+
:meanings => []
|
74
|
+
}
|
75
|
+
state = :MEAN
|
76
|
+
elsif state == :MEAN
|
77
|
+
temp[:meanings] << metadata(text)
|
64
78
|
end
|
65
|
-
state = :entry
|
66
79
|
end
|
80
|
+
response[:other_meanings] = parsed_meanings
|
67
81
|
|
68
|
-
|
82
|
+
response
|
69
83
|
end
|
70
84
|
|
85
|
+
def metadata text
|
86
|
+
# To be implemented
|
87
|
+
# The idea would be to split the text in metadata
|
88
|
+
# and real text. It's seems quite tricky.
|
89
|
+
{
|
90
|
+
:meaning => text,
|
91
|
+
:meta => nil
|
92
|
+
}
|
93
|
+
end
|
71
94
|
def parse_multiple
|
72
95
|
@doc.css('body > ul > li > a').map do |word|
|
73
96
|
{
|
@@ -78,13 +101,11 @@ class Parser
|
|
78
101
|
end
|
79
102
|
|
80
103
|
def valid?
|
81
|
-
|
82
|
-
valid_body = (@doc.css('body').inner_text =~/No encontrado/).nil?
|
83
|
-
|
84
|
-
valid_title && valid_body && delete_pending?
|
104
|
+
!@doc.css('article').length.zero? # delete_pending?
|
85
105
|
end
|
86
106
|
|
87
107
|
def delete_pending?
|
108
|
+
# TODO: Check
|
88
109
|
tb_deleted = true
|
89
110
|
if !@doc.css('body > div > p').nil? && !@doc.css('body > div > p').first.nil?
|
90
111
|
tb_deleted = (@doc.css('body > div > p').first.inner_text =~/suprimido/).nil?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nebrija
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- "@javierhonduco"
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-02-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|