nebrija 0.2.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/nebrija +2 -1
  3. data/lib/nebrija/parser.rb +57 -36
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 08639cd57c6f906d92468d3366cf2302cdbf2332
4
- data.tar.gz: 1f288ad92cc4c1b451cfd5ec28737fa435420801
3
+ metadata.gz: 4e22c43f3c2a0f893f6e66a2f64337d3c813377a
4
+ data.tar.gz: 08bb77b1e7d90bf3a9201ec3b76b8c8a2275ec75
5
5
  SHA512:
6
- metadata.gz: 6754e729f784cac1e9d3fe37e22e5f0a81cad4414116ce37a50ba7cc29079609932e2f5ed06051cb1eb48b3e49740284d8e92aa33a7b607c87d3bbe6ae8c3537
7
- data.tar.gz: cd496967b0e218f27acc7c933d33350a5c0fa64793ab52e43f414b0149f2e6c3ee13e74ce6a13f78adc8ce9347d5334707f251453495e04d3f085e6e3bf68937
6
+ metadata.gz: bf5634e8b8e06f50578d525301947f9c728358ace2bed1a46c80e3582563c2f0670004dd8c275a53a3616abf06ef9e29fd4d8f9d4bc49db98591ae917bee56ca
7
+ data.tar.gz: 7992fbe7cbfee315992699ca37b6a830215fc50e754ff8def65c7fb025cbc511c09da0fc7ff0f8bfdd42cb54eb1f9afd2bcad706f48bc2f71c05be2477fd74a5
data/bin/nebrija CHANGED
@@ -3,4 +3,5 @@
3
3
  require 'json'
4
4
  require_relative '../lib/nebrija/cli'
5
5
 
6
- Nebrija::cli(ARGV.first)
6
+ raise NotImplementedError
7
+ #Nebrija::cli(ARGV.first)
@@ -14,8 +14,8 @@ class Parser
14
14
  if valid?
15
15
  {
16
16
  :status => 'success',
17
- :type => single? ? 'single' : 'multiple',
18
- :response => single? ? parse_single : parse_multiple
17
+ :type => 'single',
18
+ :response => parse_single
19
19
  }
20
20
  else
21
21
  {
@@ -28,46 +28,69 @@ class Parser
28
28
  private
29
29
 
30
30
  def single?
31
- @doc.css('body > ul').length.zero?
31
+ @doc.css('article').length == 1
32
32
  end
33
33
 
34
34
  def parse_single
35
- single_data = []
36
- state = :entry # TODO. Improve FSM syntax.
37
- index = -1 # HACK(javierhonduco)
38
-
39
- @doc.css('body > div > p').each do |entry|
40
- if entry['class'] == 'p' and state == :entry
41
- word = entry.css('span').inner_text
42
- word = '=>' if word == ''
43
- single_data << {
44
- :word => word.strip.capitalize,
45
- :meanings => [],
46
- :etymology => nil
47
- }
48
- index+=1
49
- else
50
- text = entry.inner_text.strip.gsub(/[0-9]+\.[ ]/, '')
51
- if text[0] == '('
52
- single_data[index][:etymology] = text
53
- next
54
- end
35
+ response = {
36
+ :basic_meanings => [],
37
+ :other_meanings => []
38
+ }
39
+
40
+ response[:word] = @doc.css('header').inner_text.sub('.', '')
41
+
42
+ @doc.css('body > div > article > p').each_with_index do |entry, index|
43
+ if index.zero? # Parsing etymology
44
+ response[:etymology] = entry.inner_text
45
+ elsif entry['class'] =~ /j[0-9]*/
46
+ # Parsing first meaning
47
+ response[:basic_meanings] << metadata(entry.inner_text)
48
+ elsif entry['class'] == 'm' || entry['class'] =~/k[0-9]*/
49
+ # Parsing other meanings
50
+ # k is the expression with 1 element
51
+ # m is the meaning with >= elements
52
+ type = (:meaning if entry['class'] == 'm') || :expression
53
+ response[:other_meanings] << [type, entry.inner_text]
54
+ end
55
+ end
55
56
 
56
- unparsed_meta = text.scan META_REGEX
57
+ clean! response
58
+ end
59
+
60
+ def clean! response
61
+ parsed_meanings = []
62
+ state = :EXPR
63
+ temp = nil
57
64
 
58
- text = text.gsub(META_REGEX, '')
59
- single_data[index][:meanings] << {
60
- :meaning => text,
61
- :meta => (unparsed_meta.join.strip if unparsed_meta.join.strip != ''),
62
- } if !text.nil? and text != '' and index >= 0
63
- state = :definitions
65
+ response[:other_meanings].each do |type, text|
66
+ state = :EXPR if type == :expression
67
+ if state == :EXPR
68
+ unless temp.nil?
69
+ parsed_meanings << temp
70
+ end
71
+ temp = {
72
+ :expression => text,
73
+ :meanings => []
74
+ }
75
+ state = :MEAN
76
+ elsif state == :MEAN
77
+ temp[:meanings] << metadata(text)
64
78
  end
65
- state = :entry
66
79
  end
80
+ response[:other_meanings] = parsed_meanings
67
81
 
68
- single_data
82
+ response
69
83
  end
70
84
 
85
+ def metadata text
86
+ # To be implemented
87
+ # The idea would be to split the text in metadata
88
+ # and real text. It's seems quite tricky.
89
+ {
90
+ :meaning => text,
91
+ :meta => nil
92
+ }
93
+ end
71
94
  def parse_multiple
72
95
  @doc.css('body > ul > li > a').map do |word|
73
96
  {
@@ -78,13 +101,11 @@ class Parser
78
101
  end
79
102
 
80
103
  def valid?
81
- valid_title = (@doc.css('title').inner_text =~/error/).nil?
82
- valid_body = (@doc.css('body').inner_text =~/No encontrado/).nil?
83
-
84
- valid_title && valid_body && delete_pending?
104
+ !@doc.css('article').length.zero? # delete_pending?
85
105
  end
86
106
 
87
107
  def delete_pending?
108
+ # TODO: Check
88
109
  tb_deleted = true
89
110
  if !@doc.css('body > div > p').nil? && !@doc.css('body > div > p').first.nil?
90
111
  tb_deleted = (@doc.css('body > div > p').first.inner_text =~/suprimido/).nil?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nebrija
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - "@javierhonduco"
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-22 00:00:00.000000000 Z
11
+ date: 2016-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri