RubyGems - greeb - Versions diffs - 0.0.1 → 0.0.2 - Mend

greeb 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/Gemfile.lock ADDED

@@ -0,0 +1,24 @@
+PATH
+  remote: .
+  specs:
+    greeb (0.0.2)
+      rspec (~> 2.4.0)
+GEM
+  remote: http://rubygems.org/
+  specs:
+    diff-lcs (1.1.2)
+    rspec (2.4.0)
+      rspec-core (~> 2.4.0)
+      rspec-expectations (~> 2.4.0)
+      rspec-mocks (~> 2.4.0)
+    rspec-core (2.4.0)
+    rspec-expectations (2.4.0)
+      diff-lcs (~> 1.1.2)
+    rspec-mocks (2.4.0)
+PLATFORMS
+  ruby
+DEPENDENCIES
+  greeb!

data/Rakefile CHANGED

@@ -2,3 +2,11 @@
 require 'bundler'
 Bundler::GemHelper.install_tasks
+require 'rspec/core/rake_task'
+desc 'Run all examples'
+RSpec::Core::RakeTask.new(:spec) do |t|
+  t.rspec_opts = %w[--color]
+end
+task :default => :spec

data/greeb.gemspec CHANGED

@@ -1,7 +1,7 @@
 # encoding: utf-8
 $:.push File.expand_path('../lib', __FILE__)
-require 'greeb/version'
+require 'greeb'
 Gem::Specification.new do |s|
   s.name        = 'greeb'
@@ -11,11 +11,13 @@ Gem::Specification.new do |s|
   s.email       = [ 'dmitry@eveel.ru' ]
   s.homepage    = 'https://github.com/eveel/greeb'
   s.summary     = 'Greeb is a Graphematical Analyzer.'
-  s.description = 'Greeb is a Graphematical Analyzer, ' \
+  s.description = 'Greeb is awesome Graphematical Analyzer, ' \
                   'written in Ruby.'
   s.rubyforge_project = 'greeb'
+  s.add_dependency 'rspec', '~> 2.4.0'
   s.files         = `git ls-files`.split("\n")
   s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
   s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }

data/lib/enumerable.rb CHANGED

@@ -1,7 +1,9 @@
 # encoding: utf-8
+# Enumerable module additions.
+#
 module Enumerable
-  def collect_with_index(i = -1)
+  def collect_with_index(i = -1) # :nodoc:
     collect { |e| yield(e, i += 1) }
   end
   alias map_with_index collect_with_index

data/lib/greeb.rb CHANGED

@@ -1,144 +1,11 @@
 # encoding: utf-8
-require 'meta_array'
-require 'enumerable'
+# Greeb is awesome Graphematical Analyzer.
+#
 module Greeb
-  RU_LEX = /^[А-Яа-я]+$/u
-  EN_LEX = /^[A-Za-z]+$/u
-  EOL = /^\n+$/u
-  SEP = /^[*=_\/\\ ]$/u
-  PUN = /^(\.|\!|\?)$/u
-  SPUN = /^(\,|\[|\]|\(|\)|\-|:|;)$/u
-  DIG = /^[0-9]+$/u
-  DIL = /^[А-Яа-яA-Za-z0-9]+$/u
-  EMPTY = ''
-  class Parser
-    attr_accessor :origin
-    private :origin=
-    attr_writer :tree
-    private :tree=
-    def initialize(origin)
-      self.origin = origin
-    end
-    def tree
-      @tree ||= parse(origin)
-    end
-    private
-      def parse(origin) # :nodoc:
-        tree = MetaArray.new
-        # paragraph
-        p_id = 0
-        # sentence
-        s_id = 0
-        # subsentence
-        ss_id = 0
-        token = ''
+  # Version of the Greeb.
+  #
+  VERSION = "0.0.2"
-        origin.each_char do |c|
-          puts "[#{token.inspect}] ← #{c.inspect}"
-          case c
-            when EOL then begin
-              case token
-                when EMPTY then token << c
-                when EOL then begin
-                  token = ''
-                  p_id += 1
-                  s_id = 0
-                  ss_id = 0
-                end
-              else
-                tree[p_id][s_id][ss_id] << token
-                token = c
-              end
-            end
-            when SEP then begin
-              case token
-                when EMPTY
-              else
-                tree[p_id][s_id][ss_id] << token
-                while tree[p_id][s_id][ss_id].last == c
-                  tree[p_id][s_id][ss_id].pop
-                end
-                tree[p_id][s_id][ss_id] << c
-                token = ''
-              end
-            end
-            when PUN then begin
-              case token
-                when EMPTY
-              else
-                tree[p_id][s_id][ss_id] << token
-                tree[p_id][s_id][ss_id] << c
-                token = ''
-                s_id += 1
-                ss_id = 0
-              end
-            end
-            when SPUN then begin
-              case token
-                when EMPTY
-              else
-                tree[p_id][s_id][ss_id] << token
-                tree[p_id][s_id][ss_id] << c
-                token = ''
-                ss_id += 1
-              end
-            end
-            when RU_LEX then begin
-              case token
-                when EOL then begin
-                  tree[p_id][s_id][ss_id] << ' '
-                  token = c
-                end
-              else
-                token << c
-              end
-            end
-            when EN_LEX then begin
-              case token
-                when EOL then begin
-                  tree[p_id][s_id][ss_id] << ' '
-                  token = c
-                end
-              else
-                token << c
-              end
-            end
-            when DIG then begin
-              case token
-                when EOL then begin
-                  tree[p_id][s_id][ss_id] << ' '
-                  token = c
-                end
-              else
-                token << c
-              end
-            end
-            when DIL then begin
-              case token
-                when EOL then begin
-                  tree[p_id][s_id][ss_id] << token
-                  token = c
-                end
-              else
-                token << c
-              end
-            end
-          end
-        end
-        tree[p_id][s_id][ss_id] << token
-        tree.delete(nil)
-        tree.to_a
-      end
-  end
+  require 'greeb/parser'
 end

data/lib/greeb/parser.rb ADDED

@@ -0,0 +1,176 @@
+# encoding: utf-8
+require 'meta_array'
+require 'enumerable'
+# Graphematical Parser of the Greeb.
+# Use it with love.
+#
+class Greeb::Parser
+  # Russian lexeme (i.e.: "хуй").
+  #
+  RUSSIAN_LEXEME = /^[А-Яа-яЁё]+$/u
+  # English lexeme (i.e.: "foo").
+  #
+  ENGLISH_LEXEME = /^[A-Za-z]+$/u
+  # End of Line sequence (i.e.: "\n").
+  #
+  END_OF_LINE = /^\n+$/u
+  # In-subsentence seprator (i.e.: "*" or "\").
+  #
+  SEPARATOR = /^[*=_\/\\ ]$/u
+  # Punctuation character (i.e.: "." or "!").
+  #
+  PUNCTUATION = /^(\.|\!|\?)$/u
+  # In-sentence punctuation character (i.e.: "," or "-").
+  #
+  SENTENCE_PUNCTUATION = /^(\,|\[|\]|\(|\)|\-|:|;)$/u
+  # Digit (i.e.: "1337").
+  #
+  DIGIT = /^[0-9]+$/u
+  # Digit-Letter complex (i.e.: "0xDEADBEEF").
+  #
+  DIGIT_LETTER = /^[А-Яа-яA-Za-z0-9Ёё]+$/u
+  # Empty string (i.e.: "").
+  #
+  EMPTY = ''
+  attr_accessor :text
+  private :text=
+  # Create a new instance of Greeb::Parser.
+  #
+  # ==== Parameters
+  # text<String>:: Source text.
+  #
+  def initialize(text)
+    self.text = text
+  end
+  # Perform the text parsing.
+  #
+  # ==== Returns
+  # Array:: Tree of Graphematical Analysis of text.
+  #
+  def parse
+    return @tree if @tree
+    # parse tree
+    tree = MetaArray.new
+    # paragraph, sentence, subsentence
+    p_id, s_id, ss_id = 0, 0, 0
+    # current token
+    token = ''
+    # run FSM
+    text.each_char do |c|
+      case c
+        when END_OF_LINE then begin
+          case token
+            when EMPTY then token << c
+            when END_OF_LINE then begin
+              token = ''
+              p_id += 1
+              s_id = 0
+              ss_id = 0
+            end
+          else
+            tree[p_id][s_id][ss_id] << token
+            token = c
+          end
+        end
+        when SEPARATOR then begin
+          case token
+            when EMPTY
+          else
+            tree[p_id][s_id][ss_id] << token
+            while tree[p_id][s_id][ss_id].last == c
+              tree[p_id][s_id][ss_id].pop
+            end
+            tree[p_id][s_id][ss_id] << c
+            token = ''
+          end
+        end
+        when PUNCTUATION then begin
+          case token
+            when EMPTY
+          else
+            tree[p_id][s_id][ss_id] << token
+            tree[p_id][s_id][ss_id] << c
+            token = ''
+            s_id += 1
+            ss_id = 0
+          end
+        end
+        when SENTENCE_PUNCTUATION then begin
+          case token
+            when EMPTY
+          else
+            tree[p_id][s_id][ss_id] << token
+            tree[p_id][s_id][ss_id] << c
+            token = ''
+            ss_id += 1
+          end
+        end
+        when RUSSIAN_LEXEME then begin
+          case token
+            when END_OF_LINE then begin
+              tree[p_id][s_id][ss_id] << ' '
+              token = c
+            end
+          else
+            token << c
+          end
+        end
+        when ENGLISH_LEXEME then begin
+          case token
+            when END_OF_LINE then begin
+              tree[p_id][s_id][ss_id] << ' '
+              token = c
+            end
+          else
+            token << c
+          end
+        end
+        when DIGIT then begin
+          case token
+            when END_OF_LINE then begin
+              tree[p_id][s_id][ss_id] << ' '
+              token = c
+            end
+          else
+            token << c
+          end
+        end
+        when DIGIT_LETTER then begin
+          case token
+            when END_OF_LINE then begin
+              tree[p_id][s_id][ss_id] << token
+              token = c
+            end
+          else
+            token << c
+          end
+        end
+      end
+    end
+    unless token.empty?
+      tree[p_id][s_id][ss_id] << token
+    end
+    tree.delete(nil)
+    @tree = tree.to_a
+  end
+end

data/lib/meta_array.rb CHANGED

@@ -1,5 +1,8 @@
 # encoding: utf-8
+# MetaArray is an Array, which creates subarrays
+# on non-existent elements.
+#
 class MetaArray < Array
   def [] id
     super(id) or begin

data/spec/parser_spec.rb ADDED

@@ -0,0 +1,63 @@
+# encoding: utf-8
+require File.expand_path('../spec_helper.rb', __FILE__)
+describe Greeb::Parser do
+  it 'should parse very simple strings' do
+    'буба сука дебил'.should be_parsed_as([
+      [
+        [ [ 'буба', ' ', 'сука', ' ', 'дебил' ] ]
+      ]
+    ])
+  end
+  it 'should parse one sentence with subsentences' do
+    'буба, сука, дебил'.should be_parsed_as([
+      [
+        [
+          [ 'буба', ',' ],
+          [ 'сука', ',' ],
+          [ 'дебил' ]
+        ]
+      ]
+    ])
+  end
+  it 'should parse two simple paragraphs' do
+    "буба сука дебил\n\nточно!".should be_parsed_as([
+      [
+        [ [ 'буба', ' ', 'сука', ' ', 'дебил' ] ]
+      ],
+      [
+        [ [ 'точно', '!' ] ]
+      ]
+    ])
+  end
+  it 'should parse two sentences in paragraph' do
+    "буба молодец? буба умница.".should be_parsed_as([
+      [
+        [ [ 'буба', ' ', 'молодец', '?' ] ],
+        [ [ 'буба', ' ', 'умница', '.' ] ]
+      ]
+    ])
+  end
+  it 'should parse sentences with floating point values' do
+    'буба не считает Пи равной 3.14'.should be_parsed_as([
+      [
+        [ [ 'буба', ' ', 'не', ' ', 'считает', ' ',
+            'Пи', ' ', 'равной', ' ', '3.14' ] ]
+      ]
+    ])
+  end
+  it 'should parse sentences with floating "dot" values' do
+    'буба не считает Пи равной 3,14'.should be_parsed_as([
+      [
+        [ [ 'буба', ' ', 'не', ' ', 'считает', ' ',
+            'Пи', ' ', 'равной', ' ', '3,14' ] ]
+      ]
+    ])
+  end
+end

data/spec/spec_helper.rb ADDED

@@ -0,0 +1,14 @@
+# encoding: utf-8
+require File.expand_path('../../lib/greeb', __FILE__)
+RSpec.configure do |c|
+  c.mock_with :rspec
+end
+RSpec::Matchers.define :be_parsed_as do |expected|
+  match do |actual|
+    tree = Greeb::Parser.new(actual).parse
+    tree == expected
+  end
+end

metadata CHANGED

@@ -1,18 +1,29 @@
 --- !ruby/object:Gem::Specification
 name: greeb
 version: !ruby/object:Gem::Version
-  version: 0.0.1
-  prerelease: !!null
+  version: 0.0.2
+  prerelease:
 platform: ruby
 authors:
 - Dmitry A. Ustalov
-autorequire: !!null
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-02-06 00:00:00.000000000 +05:00
-default_executable: !!null
-dependencies: []
-description: Greeb is a Graphematical Analyzer, written in Ruby.
+date: 2011-02-20 00:00:00.000000000 +05:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: &81165430 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 2.4.0
+  type: :runtime
+  prerelease: false
+  version_requirements: *81165430
+description: Greeb is awesome Graphematical Analyzer, written in Ruby.
 email:
 - dmitry@eveel.ru
 executables: []
@@ -21,18 +32,21 @@ extra_rdoc_files: []
 files:
 - .gitignore
 - Gemfile
+- Gemfile.lock
 - README
 - Rakefile
 - greeb-test.rb
 - greeb.gemspec
 - lib/enumerable.rb
 - lib/greeb.rb
-- lib/greeb/version.rb
+- lib/greeb/parser.rb
 - lib/meta_array.rb
+- spec/parser_spec.rb
+- spec/spec_helper.rb
 has_rdoc: true
 homepage: https://github.com/eveel/greeb
 licenses: []
-post_install_message: !!null
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -50,8 +64,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project: greeb
-rubygems_version: 1.5.0
-signing_key: !!null
+rubygems_version: 1.5.2
+signing_key:
 specification_version: 3
 summary: Greeb is a Graphematical Analyzer.
-test_files: []
+test_files:
+- spec/parser_spec.rb
+- spec/spec_helper.rb

data/lib/greeb/version.rb DELETED

@@ -1,5 +0,0 @@
-# encoding: utf-8
-module Greeb
-  VERSION = "0.0.1"
-end