nebrija 0.2.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/nebrija +2 -1
  3. data/lib/nebrija/parser.rb +57 -36
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 08639cd57c6f906d92468d3366cf2302cdbf2332
4
- data.tar.gz: 1f288ad92cc4c1b451cfd5ec28737fa435420801
3
+ metadata.gz: 4e22c43f3c2a0f893f6e66a2f64337d3c813377a
4
+ data.tar.gz: 08bb77b1e7d90bf3a9201ec3b76b8c8a2275ec75
5
5
  SHA512:
6
- metadata.gz: 6754e729f784cac1e9d3fe37e22e5f0a81cad4414116ce37a50ba7cc29079609932e2f5ed06051cb1eb48b3e49740284d8e92aa33a7b607c87d3bbe6ae8c3537
7
- data.tar.gz: cd496967b0e218f27acc7c933d33350a5c0fa64793ab52e43f414b0149f2e6c3ee13e74ce6a13f78adc8ce9347d5334707f251453495e04d3f085e6e3bf68937
6
+ metadata.gz: bf5634e8b8e06f50578d525301947f9c728358ace2bed1a46c80e3582563c2f0670004dd8c275a53a3616abf06ef9e29fd4d8f9d4bc49db98591ae917bee56ca
7
+ data.tar.gz: 7992fbe7cbfee315992699ca37b6a830215fc50e754ff8def65c7fb025cbc511c09da0fc7ff0f8bfdd42cb54eb1f9afd2bcad706f48bc2f71c05be2477fd74a5
data/bin/nebrija CHANGED
@@ -3,4 +3,5 @@
3
3
  require 'json'
4
4
  require_relative '../lib/nebrija/cli'
5
5
 
6
- Nebrija::cli(ARGV.first)
6
+ raise NotImplementedError
7
+ #Nebrija::cli(ARGV.first)
@@ -14,8 +14,8 @@ class Parser
14
14
  if valid?
15
15
  {
16
16
  :status => 'success',
17
- :type => single? ? 'single' : 'multiple',
18
- :response => single? ? parse_single : parse_multiple
17
+ :type => 'single',
18
+ :response => parse_single
19
19
  }
20
20
  else
21
21
  {
@@ -28,46 +28,69 @@ class Parser
28
28
  private
29
29
 
30
30
  def single?
31
- @doc.css('body > ul').length.zero?
31
+ @doc.css('article').length == 1
32
32
  end
33
33
 
34
34
  def parse_single
35
- single_data = []
36
- state = :entry # TODO. Improve FSM syntax.
37
- index = -1 # HACK(javierhonduco)
38
-
39
- @doc.css('body > div > p').each do |entry|
40
- if entry['class'] == 'p' and state == :entry
41
- word = entry.css('span').inner_text
42
- word = '=>' if word == ''
43
- single_data << {
44
- :word => word.strip.capitalize,
45
- :meanings => [],
46
- :etymology => nil
47
- }
48
- index+=1
49
- else
50
- text = entry.inner_text.strip.gsub(/[0-9]+\.[ ]/, '')
51
- if text[0] == '('
52
- single_data[index][:etymology] = text
53
- next
54
- end
35
+ response = {
36
+ :basic_meanings => [],
37
+ :other_meanings => []
38
+ }
39
+
40
+ response[:word] = @doc.css('header').inner_text.sub('.', '')
41
+
42
+ @doc.css('body > div > article > p').each_with_index do |entry, index|
43
+ if index.zero? # Parsing etymology
44
+ response[:etymology] = entry.inner_text
45
+ elsif entry['class'] =~ /j[0-9]*/
46
+ # Parsing first meaning
47
+ response[:basic_meanings] << metadata(entry.inner_text)
48
+ elsif entry['class'] == 'm' || entry['class'] =~/k[0-9]*/
49
+ # Parsing other meanings
50
+ # k is the expression with 1 element
51
+ # m is the meaning with >= elements
52
+ type = (:meaning if entry['class'] == 'm') || :expression
53
+ response[:other_meanings] << [type, entry.inner_text]
54
+ end
55
+ end
55
56
 
56
- unparsed_meta = text.scan META_REGEX
57
+ clean! response
58
+ end
59
+
60
+ def clean! response
61
+ parsed_meanings = []
62
+ state = :EXPR
63
+ temp = nil
57
64
 
58
- text = text.gsub(META_REGEX, '')
59
- single_data[index][:meanings] << {
60
- :meaning => text,
61
- :meta => (unparsed_meta.join.strip if unparsed_meta.join.strip != ''),
62
- } if !text.nil? and text != '' and index >= 0
63
- state = :definitions
65
+ response[:other_meanings].each do |type, text|
66
+ state = :EXPR if type == :expression
67
+ if state == :EXPR
68
+ unless temp.nil?
69
+ parsed_meanings << temp
70
+ end
71
+ temp = {
72
+ :expression => text,
73
+ :meanings => []
74
+ }
75
+ state = :MEAN
76
+ elsif state == :MEAN
77
+ temp[:meanings] << metadata(text)
64
78
  end
65
- state = :entry
66
79
  end
80
+ response[:other_meanings] = parsed_meanings
67
81
 
68
- single_data
82
+ response
69
83
  end
70
84
 
85
+ def metadata text
86
+ # To be implemented
87
+ # The idea would be to split the text in metadata
88
+ # and real text. It's seems quite tricky.
89
+ {
90
+ :meaning => text,
91
+ :meta => nil
92
+ }
93
+ end
71
94
  def parse_multiple
72
95
  @doc.css('body > ul > li > a').map do |word|
73
96
  {
@@ -78,13 +101,11 @@ class Parser
78
101
  end
79
102
 
80
103
  def valid?
81
- valid_title = (@doc.css('title').inner_text =~/error/).nil?
82
- valid_body = (@doc.css('body').inner_text =~/No encontrado/).nil?
83
-
84
- valid_title && valid_body && delete_pending?
104
+ !@doc.css('article').length.zero? # delete_pending?
85
105
  end
86
106
 
87
107
  def delete_pending?
108
+ # TODO: Check
88
109
  tb_deleted = true
89
110
  if !@doc.css('body > div > p').nil? && !@doc.css('body > div > p').first.nil?
90
111
  tb_deleted = (@doc.css('body > div > p').first.inner_text =~/suprimido/).nil?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nebrija
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - "@javierhonduco"
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-22 00:00:00.000000000 Z
11
+ date: 2016-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri