RubyGems - vn_tagger - Versions diffs - 0.1.0 → 0.2.0 - Mend

vn_tagger 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/README.md +4 -5
data/lib/vn_tagger/document.rb +29 -0
data/lib/vn_tagger/tagger.rb +20 -14
data/lib/vn_tagger/version.rb +1 -1
data/lib/vn_tagger/word.rb +35 -0
data/spec/vn_tagger/document_spec.rb +20 -0
data/spec/vn_tagger/tagger_spec.rb +6 -5
data/spec/vn_tagger/word_spec.rb +18 -0
metadata +8 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: eb1eff57d9490c011b4d340c0d167a20245b8c5c
-  data.tar.gz: b411874dc1e8f7b574cc8995224f6f091bbb5206
+  metadata.gz: 637f23fe16d647e9041131360de3ffd39319c504
+  data.tar.gz: ca68ec84d04a2b0689d4f3dff85a9dd8a0a8f874
 SHA512:
-  metadata.gz: ddce5c31d971c282ba08599a5b49cd38d46ec5e0c178b5b455b47324c941470ef8b4f15fa0f5c8162460105fcaa39344809100a6e2b6f2a856e5df7e6412716b
-  data.tar.gz: 7c96f95660818f66a11607d5056546e188e18cdb7a504babe57bb27eb3fcd4102e244e9768982a442b624e3d4a1929f223c35970dd6349d843a4a2f62f67dc3d
+  metadata.gz: 2430d567ed7dc55a0cea23ffbd0781dfd147afb7576a3593bc71cbef49779c74b737c5d73f88e67101294a42ad5f629f34f9c8a0974e1d34ce7399faded2a21d
+  data.tar.gz: 2caaec9e11213317445b23651cfc64272d35208f6bd256a62a053b01f24750536564b043a9e695672fcf97c2825944e49f3ec83a5b7ddd385e9c5a3c65e30658

data/README.md CHANGED Viewed

@@ -20,11 +20,10 @@ Or install it yourself as:
 text = 'Mourinho là huấn luyện viên của Chelsea'
 document = VnTagger::Tagger.tag(text)
-document.to_xml # => "<?xml version=\"1.0\"?>\n<doc>\n\t<s>\n\t\t<w pos=\"Np\">Mourinho</w>\n\t\t<w pos=\"V\">l&#xE0;</w>\n\t\t<w pos=\"N\">hu&#x1EA5;n luy&#x1EC7;n vi&#xEA;n</w>\n\t\t<w pos=\"E\">c&#x1EE7;a</w>\n\t\t<w pos=\"Np\">Chelsea</w>\n\t\t<w pos=\".\">.</w>\n\t</s>\n</doc>\n"
-document.class # => Nokogiri::Xml::Document
-keys = document.xpath("//w")
-keys.first.attr('pos') # => 'Np'
-keys.first.child.text #=> 'Mourinho'
+document.words.map { |word|
+  [word.text, word.position]
+} # => [["Mourinho", "Np"], ["là", "V"], ["huấn luyện viên", "N"], ["của", "E"],
+["Chelsea", "Np"]]
 ```
 ## Contributing

data/lib/vn_tagger/document.rb ADDED Viewed

@@ -0,0 +1,29 @@
+require 'word'
+module VnTagger
+  class Document
+    attr_reader :xml_document
+    def initialize(xml_document)
+      @xml_document = xml_document
+    end
+    def words
+      @words ||= uncached_words
+    end
+    def uncached_words
+      xml_document.xpath('//w').map do |element|
+        Word.new(element.attr('pos'), element.child.text)
+      end
+    end
+    def filter_by_tag(tag = 'Proper noun')
+      words.select { |word| word.is_tag?(tag) }
+    end
+    def proper_noun_words
+      filter_by_tag('Proper noun').map(&:text)
+    end
+  end
+end

data/lib/vn_tagger/tagger.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 require 'nokogiri'
+require 'document'
 module VnTagger
   class Tagger
@@ -13,16 +14,28 @@ module VnTagger
     def tag
       write_to_file
-      wasGood = system("cd #{ROOT_PATH}; #{COMMAND} -i #{INPUT} -o #{OUTPUT}")
-      if wasGood
-        result_from_output
-      else
-        nil
-      end
+      @success = system("cd #{ROOT_PATH}; #{COMMAND} -i #{INPUT} -o #{OUTPUT}")
+    end
+    def xml_result
+      @xml_result ||= if @success
+                        file = File.open(OUTPUT)
+                        xml_document = Nokogiri::XML(file)
+                        file.close
+                        xml_document
+                      else
+                        Nokogiri::XML('')
+                      end
+    end
+    def result
+      @result ||= Document.new(xml_result)
     end
     def self.tag(text)
-      new(text).tag
+      tagger = new(text)
+      tagger.tag
+      tagger.result
     end
     private
@@ -33,13 +46,6 @@ module VnTagger
       file.close
     end
-    def result_from_output
-      file = File.open(OUTPUT)
-      doc = Nokogiri::XML(file)
-      file.close
-      doc
-    end
     def normalize(string)
       string.to_s.gsub(/(\"|\')/, '')
     end

data/lib/vn_tagger/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module VnTagger
-  VERSION = '0.1.0'
+  VERSION = '0.2.0'
 end

data/lib/vn_tagger/word.rb ADDED Viewed

@@ -0,0 +1,35 @@
+module VnTagger
+  class Word
+    attr_reader :position, :text
+    WORD_POSITIONS = {
+      'Np' => 'Proper noun',
+      'Nc' => 'Classifier',
+      'Nu' => 'Unit noun',
+      'N' => 'Common noun',
+      'V' => 'Verb',
+      'A' => 'Adjective',
+      'P' => 'Pronoun',
+      'R' => 'Adverb',
+      'L' => 'Determiner',
+      'M' => 'Numeral',
+      'E' => 'Preposition',
+      'C' => 'Subordinating conjunction',
+      'CC' => 'Coordinating conjunction',
+      'I' => 'Interjection',
+      'T' => 'Auxiliary, modal words',
+      'Y' => 'Abbreviation',
+      'Z' => 'Bound morphemes',
+      'X' => 'Unknown'
+    }
+    def initialize(position, text)
+      @position = position
+      @text = text
+    end
+    def is_tag?(tag = 'Proper noun')
+      WORD_POSITIONS[position] == tag
+    end
+  end
+end

data/spec/vn_tagger/document_spec.rb ADDED Viewed

@@ -0,0 +1,20 @@
+require 'spec_helper'
+describe VnTagger::Document do
+  let(:xml_text) { '<doc><s><w pos="Np">HLV</w></s></doc>' }
+  let(:xml_document) { Nokogiri::XML(xml_text) }
+  let(:document) { described_class.new(xml_document) }
+  describe '#words' do
+    it 'returns data extract from xml document' do
+      word = document.words.first
+      expect(word.text).to eq 'HLV'
+      expect(word.position).to eq 'Np'
+    end
+  end
+  describe '#proper_noun_words' do
+    subject { document.proper_noun_words }
+    it { is_expected.to eq ['HLV'] }
+  end
+end

data/spec/vn_tagger/tagger_spec.rb CHANGED Viewed

@@ -4,13 +4,14 @@ describe VnTagger::Tagger do
   describe '#tag' do
     let(:text) { 'HLV cùa Chelsea không hối tiếc vì hành động bỏ về sớm trong trận gặp Aston Villa.' }
     let(:tagger) { described_class.new(text) }
-    let(:result) { tagger.tag }
-    let(:key) { result.xpath("//w").first }
+    let(:result) { tagger.result }
+    let(:word) { result.words.first }
     it 'returns xml tagged text' do
-      expect(result).to be_a(Nokogiri::XML::Document)
-      expect(key.attr('pos')).to eq 'Np'
-      expect(key.child.text).to eq 'HLV'
+      tagger.tag
+      expect(result).to be_a(VnTagger::Document)
+      expect(word.position).to eq 'Np'
+      expect(word.text).to eq 'HLV'
     end
   end
 end

data/spec/vn_tagger/word_spec.rb ADDED Viewed

@@ -0,0 +1,18 @@
+require 'spec_helper'
+describe VnTagger::Word do
+  describe '#is_tag?' do
+    let(:word) { described_class.new('N', 'text') }
+    subject { word.is_tag?(tag) }
+    context 'when tag is the same with word position' do
+      let(:tag) { 'Common noun' }
+      it { is_expected.to eq true }
+    end
+    context 'when tag is not the same with word position' do
+      let(:tag) { 'Verb' }
+      it { is_expected.to eq false }
+    end
+  end
+end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: vn_tagger
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.2.0
 platform: ruby
 authors:
 - Hieu Nguyen
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-12-22 00:00:00.000000000 Z
+date: 2014-12-30 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -86,8 +86,10 @@ files:
 - lib/vn.hus.nlp.tokenizer-4.1.1.jar
 - lib/vn.hus.nlp.utils-1.0.0.jar
 - lib/vn_tagger.rb
+- lib/vn_tagger/document.rb
 - lib/vn_tagger/tagger.rb
 - lib/vn_tagger/version.rb
+- lib/vn_tagger/word.rb
 - resources/automata/dfaLexicon.xml
 - resources/automata/externalLexicon.xml
 - resources/bigram/bigram.xml
@@ -98,7 +100,9 @@ files:
 - resources/normalization/rules.txt
 - resources/prefix/namedEntityPrefix.xml
 - spec/spec_helper.rb
+- spec/vn_tagger/document_spec.rb
 - spec/vn_tagger/tagger_spec.rb
+- spec/vn_tagger/word_spec.rb
 - spec/vn_tagger_spec.rb
 - vn.hus.nlp.tagger-4.2.0.jar
 - vnTagger.sh
@@ -130,5 +134,7 @@ specification_version: 4
 summary: This is a wrapper for vn_tagger library, a A POS tagger for Vietnamese texts.
 test_files:
 - spec/spec_helper.rb
+- spec/vn_tagger/document_spec.rb
 - spec/vn_tagger/tagger_spec.rb
+- spec/vn_tagger/word_spec.rb
 - spec/vn_tagger_spec.rb