RubyGems - nebrija - Versions diffs - 0.2.2 → 1.0.0 - Mend

nebrija 0.2.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 08639cd57c6f906d92468d3366cf2302cdbf2332
-  data.tar.gz: 1f288ad92cc4c1b451cfd5ec28737fa435420801
+  metadata.gz: 4e22c43f3c2a0f893f6e66a2f64337d3c813377a
+  data.tar.gz: 08bb77b1e7d90bf3a9201ec3b76b8c8a2275ec75
 SHA512:
-  metadata.gz: 6754e729f784cac1e9d3fe37e22e5f0a81cad4414116ce37a50ba7cc29079609932e2f5ed06051cb1eb48b3e49740284d8e92aa33a7b607c87d3bbe6ae8c3537
-  data.tar.gz: cd496967b0e218f27acc7c933d33350a5c0fa64793ab52e43f414b0149f2e6c3ee13e74ce6a13f78adc8ce9347d5334707f251453495e04d3f085e6e3bf68937
+  metadata.gz: bf5634e8b8e06f50578d525301947f9c728358ace2bed1a46c80e3582563c2f0670004dd8c275a53a3616abf06ef9e29fd4d8f9d4bc49db98591ae917bee56ca
+  data.tar.gz: 7992fbe7cbfee315992699ca37b6a830215fc50e754ff8def65c7fb025cbc511c09da0fc7ff0f8bfdd42cb54eb1f9afd2bcad706f48bc2f71c05be2477fd74a5

data/bin/nebrija CHANGED Viewed

@@ -3,4 +3,5 @@
 require 'json'
 require_relative '../lib/nebrija/cli'
-Nebrija::cli(ARGV.first)
+raise NotImplementedError
+#Nebrija::cli(ARGV.first)

data/lib/nebrija/parser.rb CHANGED Viewed

@@ -14,8 +14,8 @@ class Parser
     if valid?
       {
         :status => 'success',
-        :type => single? ? 'single' : 'multiple',
-        :response => single? ? parse_single : parse_multiple
+        :type => 'single',
+        :response => parse_single
       }
     else
       {
@@ -28,46 +28,69 @@ class Parser
   private
   def single?
-    @doc.css('body > ul').length.zero?
+    @doc.css('article').length == 1
   end
   def parse_single
-    single_data = []
-    state = :entry # TODO. Improve FSM syntax.
-    index = -1 # HACK(javierhonduco)
-    @doc.css('body > div > p').each do |entry|
-      if entry['class'] == 'p' and state == :entry
-        word = entry.css('span').inner_text
-        word = '=>' if word == ''
-        single_data << {
-          :word => word.strip.capitalize,
-          :meanings => [],
-          :etymology  => nil
-        }
-        index+=1
-      else
-        text = entry.inner_text.strip.gsub(/[0-9]+\.[ ]/, '')
-        if text[0] == '('
-          single_data[index][:etymology] = text
-          next
-        end
+    response = {
+      :basic_meanings => [],
+      :other_meanings => []
+    }
+    response[:word] = @doc.css('header').inner_text.sub('.', '')
+    @doc.css('body > div > article > p').each_with_index do |entry, index|
+      if index.zero? # Parsing etymology
+        response[:etymology] = entry.inner_text
+      elsif entry['class'] =~ /j[0-9]*/
+        # Parsing first meaning
+        response[:basic_meanings] << metadata(entry.inner_text)
+      elsif entry['class'] == 'm' || entry['class'] =~/k[0-9]*/
+        # Parsing other meanings
+        #   k is the expression with 1 element
+        #   m is the meaning with >= elements
+        type = (:meaning if entry['class'] == 'm') || :expression
+        response[:other_meanings] << [type, entry.inner_text]
+      end
+    end
-        unparsed_meta = text.scan META_REGEX
+    clean! response
+  end
+  def clean! response
+    parsed_meanings = []
+    state = :EXPR
+    temp = nil
-        text = text.gsub(META_REGEX, '')
-        single_data[index][:meanings] << {
-          :meaning    => text,
-          :meta       => (unparsed_meta.join.strip if unparsed_meta.join.strip != ''),
-        } if !text.nil? and text != '' and index >= 0
-        state = :definitions
+    response[:other_meanings].each do |type, text|
+      state = :EXPR if type == :expression
+      if state == :EXPR
+        unless temp.nil?
+          parsed_meanings << temp
+        end
+        temp = {
+          :expression => text,
+          :meanings => []
+        }
+        state = :MEAN
+      elsif state == :MEAN
+        temp[:meanings] << metadata(text)
       end
-      state = :entry
     end
+    response[:other_meanings] = parsed_meanings
-    single_data
+    response
   end
+  def metadata text
+    # To be implemented
+    # The idea would be to split the text in metadata
+    # and real text. It's seems quite tricky.
+    {
+      :meaning => text,
+      :meta => nil
+    }
+  end
   def parse_multiple
     @doc.css('body > ul > li > a').map do |word|
       {
@@ -78,13 +101,11 @@ class Parser
   end
   def valid?
-    valid_title = (@doc.css('title').inner_text =~/error/).nil?
-    valid_body  = (@doc.css('body').inner_text =~/No encontrado/).nil?
-    valid_title && valid_body && delete_pending?
+    !@doc.css('article').length.zero? # delete_pending?
   end
   def delete_pending?
+    # TODO: Check
     tb_deleted = true
     if !@doc.css('body > div > p').nil? && !@doc.css('body > div > p').first.nil?
       tb_deleted = (@doc.css('body > div > p').first.inner_text =~/suprimido/).nil?

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: nebrija
 version: !ruby/object:Gem::Version
-  version: 0.2.2
+  version: 1.0.0
 platform: ruby
 authors:
 - "@javierhonduco"
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-11-22 00:00:00.000000000 Z
+date: 2016-02-24 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri