nebrija 0.2.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/nebrija +2 -1
- data/lib/nebrija/parser.rb +57 -36
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4e22c43f3c2a0f893f6e66a2f64337d3c813377a
|
4
|
+
data.tar.gz: 08bb77b1e7d90bf3a9201ec3b76b8c8a2275ec75
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf5634e8b8e06f50578d525301947f9c728358ace2bed1a46c80e3582563c2f0670004dd8c275a53a3616abf06ef9e29fd4d8f9d4bc49db98591ae917bee56ca
|
7
|
+
data.tar.gz: 7992fbe7cbfee315992699ca37b6a830215fc50e754ff8def65c7fb025cbc511c09da0fc7ff0f8bfdd42cb54eb1f9afd2bcad706f48bc2f71c05be2477fd74a5
|
data/bin/nebrija
CHANGED
data/lib/nebrija/parser.rb
CHANGED
@@ -14,8 +14,8 @@ class Parser
|
|
14
14
|
if valid?
|
15
15
|
{
|
16
16
|
:status => 'success',
|
17
|
-
:type =>
|
18
|
-
:response =>
|
17
|
+
:type => 'single',
|
18
|
+
:response => parse_single
|
19
19
|
}
|
20
20
|
else
|
21
21
|
{
|
@@ -28,46 +28,69 @@ class Parser
|
|
28
28
|
private
|
29
29
|
|
30
30
|
def single?
|
31
|
-
@doc.css('
|
31
|
+
@doc.css('article').length == 1
|
32
32
|
end
|
33
33
|
|
34
34
|
def parse_single
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
35
|
+
response = {
|
36
|
+
:basic_meanings => [],
|
37
|
+
:other_meanings => []
|
38
|
+
}
|
39
|
+
|
40
|
+
response[:word] = @doc.css('header').inner_text.sub('.', '')
|
41
|
+
|
42
|
+
@doc.css('body > div > article > p').each_with_index do |entry, index|
|
43
|
+
if index.zero? # Parsing etymology
|
44
|
+
response[:etymology] = entry.inner_text
|
45
|
+
elsif entry['class'] =~ /j[0-9]*/
|
46
|
+
# Parsing first meaning
|
47
|
+
response[:basic_meanings] << metadata(entry.inner_text)
|
48
|
+
elsif entry['class'] == 'm' || entry['class'] =~/k[0-9]*/
|
49
|
+
# Parsing other meanings
|
50
|
+
# k is the expression with 1 element
|
51
|
+
# m is the meaning with >= elements
|
52
|
+
type = (:meaning if entry['class'] == 'm') || :expression
|
53
|
+
response[:other_meanings] << [type, entry.inner_text]
|
54
|
+
end
|
55
|
+
end
|
55
56
|
|
56
|
-
|
57
|
+
clean! response
|
58
|
+
end
|
59
|
+
|
60
|
+
def clean! response
|
61
|
+
parsed_meanings = []
|
62
|
+
state = :EXPR
|
63
|
+
temp = nil
|
57
64
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
65
|
+
response[:other_meanings].each do |type, text|
|
66
|
+
state = :EXPR if type == :expression
|
67
|
+
if state == :EXPR
|
68
|
+
unless temp.nil?
|
69
|
+
parsed_meanings << temp
|
70
|
+
end
|
71
|
+
temp = {
|
72
|
+
:expression => text,
|
73
|
+
:meanings => []
|
74
|
+
}
|
75
|
+
state = :MEAN
|
76
|
+
elsif state == :MEAN
|
77
|
+
temp[:meanings] << metadata(text)
|
64
78
|
end
|
65
|
-
state = :entry
|
66
79
|
end
|
80
|
+
response[:other_meanings] = parsed_meanings
|
67
81
|
|
68
|
-
|
82
|
+
response
|
69
83
|
end
|
70
84
|
|
85
|
+
def metadata text
|
86
|
+
# To be implemented
|
87
|
+
# The idea would be to split the text in metadata
|
88
|
+
# and real text. It's seems quite tricky.
|
89
|
+
{
|
90
|
+
:meaning => text,
|
91
|
+
:meta => nil
|
92
|
+
}
|
93
|
+
end
|
71
94
|
def parse_multiple
|
72
95
|
@doc.css('body > ul > li > a').map do |word|
|
73
96
|
{
|
@@ -78,13 +101,11 @@ class Parser
|
|
78
101
|
end
|
79
102
|
|
80
103
|
def valid?
|
81
|
-
|
82
|
-
valid_body = (@doc.css('body').inner_text =~/No encontrado/).nil?
|
83
|
-
|
84
|
-
valid_title && valid_body && delete_pending?
|
104
|
+
!@doc.css('article').length.zero? # delete_pending?
|
85
105
|
end
|
86
106
|
|
87
107
|
def delete_pending?
|
108
|
+
# TODO: Check
|
88
109
|
tb_deleted = true
|
89
110
|
if !@doc.css('body > div > p').nil? && !@doc.css('body > div > p').first.nil?
|
90
111
|
tb_deleted = (@doc.css('body > div > p').first.inner_text =~/suprimido/).nil?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nebrija
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- "@javierhonduco"
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-02-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|