RubyGems - ruby-spacy - Versions diffs - 0.1.3 → 0.1.4 - Mend

ruby-spacy 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/Gemfile.lock +2 -1
data/README.md +7 -7
data/examples/get_started/lexeme.rb +2 -2
data/examples/get_started/linguistic_annotations.rb +1 -1
data/examples/get_started/morphology.rb +1 -1
data/examples/get_started/named_entities.rb +1 -1
data/examples/get_started/pos_tags_and_dependencies.rb +1 -1
data/examples/japanese/ancestors.rb +9 -11
data/examples/japanese/entity_annotations_and_labels.rb +1 -1
data/examples/japanese/lemmatization.rb +1 -1
data/examples/japanese/named_entity_recognition.rb +1 -1
data/examples/japanese/navigating_parse_tree.rb +18 -18
data/examples/japanese/noun_chunks.rb +1 -1
data/examples/japanese/pos_tagging.rb +1 -1
data/examples/linguistic_features/ancestors.rb +13 -10
data/examples/linguistic_features/entity_annotations_and_labels.rb +1 -1
data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +2 -2
data/examples/linguistic_features/information_extraction.rb +2 -2
data/examples/linguistic_features/iterating_children.rb +2 -2
data/examples/linguistic_features/iterating_lefts_and_rights.rb +4 -4
data/examples/linguistic_features/lemmatization.rb +1 -1
data/examples/linguistic_features/named_entity_recognition.rb +1 -1
data/examples/linguistic_features/navigating_parse_tree.rb +12 -12
data/examples/linguistic_features/noun_chunks.rb +1 -1
data/examples/linguistic_features/pos_tagging.rb +1 -1
data/examples/linguistic_features/retokenize_1.rb +1 -1
data/examples/linguistic_features/retokenize_2.rb +2 -2
data/examples/linguistic_features/rule_based_morphology.rb +1 -1
data/examples/linguistic_features/similarity_between_lexemes.rb +18 -0
data/examples/rule_based_matching/creating_spans_from_matches.rb +1 -1
data/lib/ruby-spacy.rb +181 -22
data/lib/ruby-spacy/version.rb +1 -1
metadata +4 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: f2571b8bd9a0e170c5462d73b57ab75d535936f591c6b7954d0ca6b2a9d74aeb
-  data.tar.gz: 55bffce60937e8ae1f4135a076185b64f2e989c9431e67914d926ee60b2cc537
+  metadata.gz: bd5a1c905e5aed7553ac5b1927a6b9cdecaf887c505ea3e38f806e886adeb60c
+  data.tar.gz: 6d3f3fd22e9d927d430d2b9e48dcd018da6eb601813192e6ea14e094cf51e331
 SHA512:
-  metadata.gz: b3de6a6179eef74f224db186075fe084c93fb2fc802c4831b4b23c069594ac8c6aada546336d16065b9eeef116ba3a686afc6c441e68a66ba0fbddd7ffa321a6
-  data.tar.gz: 05b174052487979bf1c81a54469264068dac76d17d17b6f870b7666472625ca596f022abf6f6d734f5b02d4d623b11351c57e1d1d90deb192aaaa36d59e7255c
+  metadata.gz: b5419fb75109b837465c64da1ace956b91d0a0ab589cdb71ace9a308ce1af263edc0e2f206a80ab71a3ab17e86e6520ab432b657c5f60548c696a36049773c60
+  data.tar.gz: 385606212f290b701458bd1a555e553417ed20be2d1e2008107396a9adc224590c76317c52d30d7c97435c0650ef8c1a15a43fe4b92c797188944a302da51612

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,10 @@
 # Change Log
+## 0.1.2 - 2021-06-26
+### Added
+- `Spacy::Lexeme` class
+- `Spacy::Token#morpheme` method
 ## 0.1.3 - 2021-06-26
 - Code cleanup

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    ruby-spacy (0.1.3)
+    ruby-spacy (0.1.4)
       numpy (~> 0.4.0)
       pycall (~> 1.4.0)
       terminal-table (~> 3.0.1)
@@ -24,6 +24,7 @@ GEM
 PLATFORMS
   arm64-darwin-20
   x86_64-darwin-20
+  x86_64-linux
 DEPENDENCIES
   github-markup

data/README.md CHANGED Viewed

@@ -128,7 +128,7 @@ headings = ["text", "lemma", "pos", "tag", "dep"]
 rows = []
 doc.each do |token|
-  rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_]
+  rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
 end
 table = Terminal::Table.new rows: rows, headings: headings
@@ -166,7 +166,7 @@ headings = ["text", "lemma", "pos", "tag", "dep"]
 rows = []
 doc.each do |token|
-  rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_]
+  rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
 end
 table = Terminal::Table.new rows: rows, headings: headings
@@ -212,7 +212,7 @@ doc.each do |token|
   morph = token.morphology.map do |k, v|
     "#{k} = #{v}"
   end.join("\n")
-  rows << [token.text, token.shape_, token.is_alpha, token.is_stop, morph]
+  rows << [token.text, token.shape, token.is_alpha, token.is_stop, morph]
 end
 table = Terminal::Table.new rows: rows, headings: headings
@@ -300,7 +300,7 @@ doc =nlp.read("Apple is looking at buying U.K. startup for $1 billion")
 rows = []
 doc.ents.each do |ent|
-  rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
+  rows << [ent.text, ent.start_char, ent.end_char, ent.label]
 end
 headings = ["text", "start_char", "end_char", "label"]
@@ -332,7 +332,7 @@ doc = nlp.read(sentence)
 rows = []
 doc.ents.each do |ent|
-  rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
+  rows << [ent.text, ent.start_char, ent.end_char, ent.label]
 end
 headings = ["text", "start", "end", "label"]
@@ -393,8 +393,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
 doc1 = nlp.read("I like salty fries and hamburgers.")
 doc2 = nlp.read("Fast food tastes very good.")
-puts "Doc 1: " + doc1
-puts "Doc 2: " + doc2
+puts "Doc 1: " + doc1.text
+puts "Doc 2: " + doc2.text
 puts "Similarity: #{doc1.similarity(doc2)}"
 ```

data/examples/get_started/lexeme.rb CHANGED Viewed

@@ -8,8 +8,8 @@ headings = ["text", "shape", "prefix", "suffix", "is_alpha", "is_digit"]
 rows = []
 doc.each do |word|
-  lexeme = doc.vocab[word.text]
-  rows << [lexeme.text, lexeme.shape_, lexeme.prefix_, lexeme.suffix_, lexeme.is_alpha, lexeme.is_digit]
+  lexeme = nlp.vocab(word.text)
+  rows << [lexeme.text, lexeme.shape, lexeme.prefix, lexeme.suffix, lexeme.is_alpha, lexeme.is_digit]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/get_started/linguistic_annotations.rb CHANGED Viewed

@@ -8,7 +8,7 @@ headings = ["text", "pos", "dep"]
 rows = []
 doc.each do |token|
-  rows << [token.text, token.pos_, token.dep_]
+  rows << [token.text, token.pos, token.dep]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/get_started/morphology.rb CHANGED Viewed

@@ -12,7 +12,7 @@ doc.each do |token|
     "#{k} = #{v}"
   end.join("\n")
   # end.join("<br />")
-  rows << [token.text, token.shape_, token.is_alpha, token.is_stop, morph]
+  rows << [token.text, token.shape, token.is_alpha, token.is_stop, morph]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/get_started/named_entities.rb CHANGED Viewed

@@ -8,7 +8,7 @@ headings = ["text", "start_char", "end_char", "label"]
 rows = []
 doc.ents.each do |ent|
-  rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
+  rows << [ent.text, ent.start_char, ent.end_char, ent.label]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/get_started/pos_tags_and_dependencies.rb CHANGED Viewed

@@ -8,7 +8,7 @@ headings = ["text", "lemma", "pos", "tag", "dep"]
 rows = []
 doc.each do |token|
-  rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_]
+  rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/japanese/ancestors.rb CHANGED Viewed

@@ -23,9 +23,7 @@ puts "The root of the sentence is: " + root.text
 puts "The subject of the sentence is: " + subject.text
 subject.subtree.each do |descendant|
-  # need to convert "ancestors" object from a python generator to a ruby array
-  ancestors = Spacy::generator_to_array(descendant.ancestors)
-  rows << [descendant.text, descendant.dep_, descendant.n_lefts, descendant.n_rights, ancestors]
+  rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, descendant.ancestors.map(&:text).join(", ")]
 end
 table = Terminal::Table.new rows: rows, headings: headings
@@ -34,11 +32,11 @@ puts table
 # The sentence: 私の父は寿司が好きだ。
 # The root of the sentence is: 好き
 # The subject of the sentence is: 父
-# +------+------------+---------+----------+----------------+
-# | text | dep        | n_lefts | n_rights | ancestors      |
-# +------+------------+---------+----------+----------------+
-# | 私   | nmod       | 0       | 1        | [父, 好き]     |
-# | の   | case       | 0       | 0        | [私, 父, 好き] |
-# | 父   | dislocated | 1       | 1        | [好き]         |
-# | は   | case       | 0       | 0        | [父, 好き]     |
-# +------+------------+---------+----------+----------------+
+# +------+------------+---------+----------+--------------+
+# | text | dep        | n_lefts | n_rights | ancestors    |
+# +------+------------+---------+----------+--------------+
+# | 私   | nmod       | 0       | 1        | 父, 好き     |
+# | の   | case       | 0       | 0        | 私, 父, 好き |
+# | 父   | dislocated | 1       | 1        | 好き         |
+# | は   | case       | 0       | 0        | 父, 好き     |
+# +------+------------+---------+----------+--------------+

data/examples/japanese/entity_annotations_and_labels.rb CHANGED Viewed

@@ -10,7 +10,7 @@ headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
 rows = []
 doc.each do |ent|
-  rows << [ent.text, ent.ent_iob, ent.ent_iob_, ent.ent_type_]
+  rows << [ent.text, ent.ent_iob, ent.ent_iob_, ent.ent_type]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/japanese/lemmatization.rb CHANGED Viewed

@@ -9,7 +9,7 @@ headings = ["text", "lemma"]
 rows = []
 doc.each do |token|
-  rows << [token.text, token.lemma_]
+  rows << [token.text, token.lemma]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/japanese/named_entity_recognition.rb CHANGED Viewed

@@ -10,7 +10,7 @@ headings = ["text", "start", "end", "label"]
 rows = []
 doc.ents.each do |ent|
-  rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
+  rows << [ent.text, ent.start_char, ent.end_char, ent.label]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/japanese/navigating_parse_tree.rb CHANGED Viewed

@@ -9,26 +9,26 @@ headings = ["text", "dep", "head text", "head pos", "children"]
 rows = []
 doc.each do |token|
-  rows << [token.text, token.dep_, token.head.text, token.head.pos_, token.children.to_s]
+  rows << [token.text, token.dep, token.head.text, token.head.pos, token.children.map(&:text).join(", ")]
 end
 table = Terminal::Table.new rows: rows, headings: headings
 puts table
-# +------+----------+-----------+----------+--------------------------+
-# | text | dep      | head text | head pos | children                 |
-# +------+----------+-----------+----------+--------------------------+
-# | 自動 | compound | 車        | NOUN     | []                       |
-# | 運転 | compound | 車        | NOUN     | []                       |
-# | 車   | nsubj    | 転嫁      | VERB     | [自動, 運転, は]         |
-# | は   | case     | 車        | NOUN     | []                       |
-# | 保険 | compound | 責任      | NOUN     | []                       |
-# | 責任 | obj      | 転嫁      | VERB     | [保険, を]               |
-# | を   | case     | 責任      | NOUN     | []                       |
-# | 製造 | compound | 者        | NOUN     | []                       |
-# | 者   | obl      | 転嫁      | VERB     | [製造, に]               |
-# | に   | case     | 者        | NOUN     | []                       |
-# | 転嫁 | ROOT     | 転嫁      | VERB     | [車, 責任, 者, する, 。] |
-# | する | aux      | 転嫁      | VERB     | []                       |
-# | 。   | punct    | 転嫁      | VERB     | []                       |
-# +------+----------+-----------+----------+--------------------------+
+ +------+----------+-----------+----------+------------------------+
+ | text | dep      | head text | head pos | children               |
+ +------+----------+-----------+----------+------------------------+
+ | 自動 | compound | 車        | 92       |                        |
+ | 運転 | compound | 車        | 92       |                        |
+ | 車   | nsubj    | 転嫁      | 100      | 自動, 運転, は         |
+ | は   | case     | 車        | 92       |                        |
+ | 保険 | compound | 責任      | 92       |                        |
+ | 責任 | obj      | 転嫁      | 100      | 保険, を               |
+ | を   | case     | 責任      | 92       |                        |
+ | 製造 | compound | 者        | 92       |                        |
+ | 者   | obl      | 転嫁      | 100      | 製造, に               |
+ | に   | case     | 者        | 92       |                        |
+ | 転嫁 | ROOT     | 転嫁      | 100      | 車, 責任, 者, する, 。 |
+ | する | aux      | 転嫁      | 100      |                        |
+ | 。   | punct    | 転嫁      | 100      |                        |
+ +------+----------+-----------+----------+------------------------+

data/examples/japanese/noun_chunks.rb CHANGED Viewed

@@ -9,7 +9,7 @@ headings = ["text", "root.text", "root.dep", "root.head.text"]
 rows = []
 doc.noun_chunks.each do |chunk|
-  rows << [chunk.text, chunk.root.text, chunk.root.dep_, chunk.root.head.text]
+  rows << [chunk.text, chunk.root.text, chunk.root.dep, chunk.root.head.text]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/japanese/pos_tagging.rb CHANGED Viewed

@@ -8,7 +8,7 @@ headings = ["text", "lemma", "pos", "tag", "dep"]
 rows = []
 doc.each do |token|
-  rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_]
+  rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/linguistic_features/ancestors.rb CHANGED Viewed

@@ -24,18 +24,21 @@ puts "The subject of the sentence is: " + subject.text
 subject.subtree.each do |descendant|
   # need to convert "ancestors" object from a python generator to a ruby array
   ancestors = Spacy::generator_to_array(descendant.ancestors)
-  rows << [descendant.text, descendant.dep_, descendant.n_lefts, descendant.n_rights, ancestors]
+  rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, ancestors.map(&:text).join(", ")]
 end
 table = Terminal::Table.new rows: rows, headings: headings
 print table
-# +----------+----------+---------+----------+------------------------------------+
-# | text     | dep      | n_lefts | n_rights | ancestors                          |
-# +----------+----------+---------+----------+------------------------------------+
-# | Credit   | nmod     | 0       | 2        | [holders, submit]                  |
-# | and      | cc       | 0       | 0        | [Credit, holders, submit]          |
-# | mortgage | compound | 0       | 0        | [account, Credit, holders, submit] |
-# | account  | conj     | 1       | 0        | [Credit, holders, submit]          |
-# | holders  | nsubj    | 1       | 0        | [submit]                           |
-# +----------+----------+---------+----------+------------------------------------+
+# The sentence: Credit and mortgage account holders must submit their requests
+# The root of the sentence is: submit
+# The subject of the sentence is: holders
+# +----------+----------+---------+----------+----------------------------------+
+# | text     | dep      | n_lefts | n_rights | ancestors                        |
+# +----------+----------+---------+----------+----------------------------------+
+# | Credit   | nmod     | 0       | 2        | holders, submit                  |
+# | and      | cc       | 0       | 0        | Credit, holders, submit          |
+# | mortgage | compound | 0       | 0        | account, Credit, holders, submit |
+# | account  | conj     | 1       | 0        | Credit, holders, submit          |
+# | holders  | nsubj    | 1       | 0        | submit                           |
+# +----------+----------+---------+----------+----------------------------------+

data/examples/linguistic_features/entity_annotations_and_labels.rb CHANGED Viewed

@@ -10,7 +10,7 @@ headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
 rows = []
 doc.each do |ent|
-  rows << [ent.text, ent.ent_iob, ent.ent_iob_, ent.ent_type_]
+  rows << [ent.text, ent.ent_iob, ent.ent_iob_, ent.ent_type]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/linguistic_features/finding_a_verb_with_a_subject.rb CHANGED Viewed

@@ -10,11 +10,11 @@ results = []
 doc.each do |token|
   if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
-    results << token.head
+    results << token.head.text
   end
 end
 puts results.to_s
-# [shift]
+# ["shift"]

data/examples/linguistic_features/information_extraction.rb CHANGED Viewed

@@ -19,11 +19,11 @@ texts.each do |text|
   doc.each do |token|
     if token.ent_type_ == "MONEY"
       if ["attr", "dobj"].index token.dep_
-        subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep_ == "nsubj"}
+        subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep == "nsubj"}
         if !subj.empty?
           puts(subj[0].text + " --> " + token.text)
         end
-      elsif token.dep_ == "pobj" and token.head.dep_ == "prep"
+      elsif token.dep_ == "pobj" and token.head.dep == "prep"
         puts token.head.head.text + " --> " + token.text
       end
     end

data/examples/linguistic_features/iterating_children.rb CHANGED Viewed

@@ -12,7 +12,7 @@ doc.each do |token|
   if token.pos_ == "VERB"
     token.children.each do |child|
       if child.dep_ == "nsubj"
-        results << child.head
+        results << child.head.text
       end
     end
   end
@@ -20,5 +20,5 @@ end
 puts results.to_s
-# [shift]
+# ["shift"]

data/examples/linguistic_features/iterating_lefts_and_rights.rb CHANGED Viewed

@@ -7,14 +7,14 @@ doc = nlp.read("bright red apples on the tree")
 puts "Text: " + doc.text
-puts "Words to the left of 'apple': " + Spacy.generator_to_array(doc[2].lefts).to_s
-puts "Words to the right of 'apple': " + Spacy.generator_to_array(doc[2].rights).to_s
+puts "Words to the left of 'apple': " + doc[2].lefts.map(&:text).join(", ")
+puts "Words to the right of 'apple': " + doc[2].rights.map(&:text).join(", ")
 puts "Num of the words to the left of 'apple': " + doc[2].n_lefts.to_s
 puts "Num of the words to the right of 'apple': " + doc[2].n_rights.to_s
 # Text: bright red apples on the tree
-# Words to the left of 'apple': [bright, red]
-# Words to the right of 'apple': [on]
+# Words to the left of 'apple': bright, red
+# Words to the right of 'apple': on
 # Num of the words to the left of 'apple': 2
 # Num of the words to the right of 'apple': 1

data/examples/linguistic_features/lemmatization.rb CHANGED Viewed

@@ -12,7 +12,7 @@ headings = ["lemma"]
 rows = []
 doc.each do |token|
-  rows << [token.lemma_]
+  rows << [token.lemma]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/linguistic_features/named_entity_recognition.rb CHANGED Viewed

@@ -10,7 +10,7 @@ headings = ["text", "start", "end", "label"]
 rows = []
 doc.ents.each do |ent|
-  rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
+  rows << [ent.text, ent.start_char, ent.end_char, ent.label]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/linguistic_features/navigating_parse_tree.rb CHANGED Viewed

@@ -12,21 +12,21 @@ headings = ["text", "dep", "head text", "head pos", "children"]
 rows = []
 doc.each do |token|
-  rows << [token.text, token.dep_, token.head.text, token.head.pos_, token.children.to_s]
+  rows << [token.text, token.dep, token.head.text, token.head.pos, token.children.map(&:text).join(", ")]
 end
 table = Terminal::Table.new rows: rows, headings: headings
 puts table
 # Lemmatizer mode: rule
-# +---------------+----------+-----------+----------+---------------------------+
-# | text          | dep      | head text | head pos | children                  |
-# +---------------+----------+-----------+----------+---------------------------+
-# | Autonomous    | amod     | cars      | NOUN     | []                        |
-# | cars          | nsubj    | shift     | VERB     | [Autonomous]              |
-# | shift         | ROOT     | shift     | VERB     | [cars, liability, toward] |
-# | insurance     | compound | liability | NOUN     | []                        |
-# | liability     | dobj     | shift     | VERB     | [insurance]               |
-# | toward        | prep     | shift     | VERB     | [manufacturers]           |
-# | manufacturers | pobj     | toward    | ADP      | []                        |
-# +---------------+----------+-----------+----------+---------------------------+
+# +---------------+----------+-----------+----------+-------------------------+
+# | text          | dep      | head text | head pos | children                |
+# +---------------+----------+-----------+----------+-------------------------+
+# | Autonomous    | amod     | cars      | NOUN     |                         |
+# | cars          | nsubj    | shift     | VERB     | Autonomous              |
+# | shift         | ROOT     | shift     | VERB     | cars, liability, toward |
+# | insurance     | compound | liability | NOUN     |                         |
+# | liability     | dobj     | shift     | VERB     | insurance               |
+# | toward        | prep     | shift     | VERB     | manufacturers           |
+# | manufacturers | pobj     | toward    | ADP      |                         |
+# +---------------+----------+-----------+----------+-------------------------+

data/examples/linguistic_features/noun_chunks.rb CHANGED Viewed

@@ -12,7 +12,7 @@ headings = ["text", "root.text", "root.dep", "root.head.text"]
 rows = []
 doc.noun_chunks.each do |chunk|
-  rows << [chunk.text, chunk.root.text, chunk.root.dep_, chunk.root.head.text]
+  rows << [chunk.text, chunk.root.text, chunk.root.dep, chunk.root.head.text]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/linguistic_features/pos_tagging.rb CHANGED Viewed

@@ -8,7 +8,7 @@ headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"
 rows = []
 doc.each do |token|
-  rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
+  rows << [token.text, token.lemma, token.pos, token.tag, token.dep, token.shape, token.is_alpha, token.is_stop]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/linguistic_features/retokenize_1.rb CHANGED Viewed

@@ -12,7 +12,7 @@ rows = []
 doc.retokenize(doc[4].left_edge.i, doc[4].right_edge.i)
 doc.each do |token|
-  rows << [token.text, token.pos_, token.dep_, token.head.text]
+  rows << [token.text, token.pos, token.dep, token.head.text]
 end
 table = Terminal::Table.new rows: rows, headings: headings

data/examples/linguistic_features/retokenize_2.rb CHANGED Viewed

@@ -6,11 +6,11 @@ nlp = Spacy::Language.new("en_core_web_sm")
 sentence = "I live in New York"
 doc = nlp.read(sentence)
-puts "Before: " + doc.tokens.collect{|t| t}.join(", ")
+puts "Before: " + doc.tokens.map(&:text).join(", ")
 doc.retokenize(3, 4)
-puts "After: " + doc.tokens.collect{|t| t}.join(", ")
+puts "After: " + doc.tokens.map(&:text).join(", ")
 # Before: I, live, in, New, York
 # After: I, live, in, New York

data/examples/linguistic_features/rule_based_morphology.rb CHANGED Viewed

@@ -6,7 +6,7 @@ nlp = Spacy::Language.new("en_core_web_sm")
 doc = nlp.read("Where are you?")
 puts "Morph features of the third word: " + doc[2].morph.to_s
-puts "POS of the third word: " + doc[2].pos_.to_s
+puts "POS of the third word: " + doc[2].pos
 # Morph features of the third word: Case=Nom|Person=2|PronType=Prs
 # POS of the third word: PRON

data/examples/linguistic_features/similarity_between_lexemes.rb ADDED Viewed

@@ -0,0 +1,18 @@
+require "ruby-spacy"
+require "terminal-table"
+nlp = Spacy::Language.new("en_core_web_lg")
+orange = nlp.vocab("orange")
+lemon = nlp.vocab("lemon")
+book = nlp.vocab("book")
+magazine = nlp.vocab("magazine")
+puts "orange <=> lemon:   #{orange.similarity(lemon)}"
+puts "book   <=> magazine: #{book.similarity(magazine)}"
+puts "orange <=> book: #{orange.similarity(book)}"
+# orange <=> lemon:   0.7080526351928711
+# book   <=> magazine: 0.4355940818786621
+# orange <=> book: 0.12197211384773254

data/examples/rule_based_matching/creating_spans_from_matches.rb CHANGED Viewed

@@ -10,7 +10,7 @@ matches = matcher.match(doc)
 matches.each do |match|
   span = Spacy::Span.new(doc, start_index: match[:start_index], end_index: match[:end_index], options: {label: match[:match_id]})
-  puts span.text + " / " + span.label_
+  puts span.text + " / " + span.label
 end
 # Barack Obama / US_PRESIDENT

data/lib/ruby-spacy.rb CHANGED Viewed

@@ -165,6 +165,9 @@ module Spacy
       # so that ents canbe "each"-ed in Ruby
       ent_array = []
       PyCall::List.(@py_doc.ents).each do |ent|
+        ent.define_singleton_method :label do
+          return self.label_
+        end
         ent_array << ent
       end
       ent_array
@@ -252,10 +255,16 @@ module Spacy
     # @param text [String] A text string representing a lexeme
     # @return [Object] Python `Lexeme` object (https://spacy.io/api/lexeme)
     def get_lexeme(text)
-      text = text.gsub("'", "\'")
       @py_nlp.vocab[text]
     end
+    # Returns a ruby lexeme object
+    # @param text [String] a text string representing the vocabulary item
+    # @return [Lexeme]
+    def vocab(text)
+      Lexeme.new(@py_nlp.vocab[text])
+    end
     # Returns _n_ lexemes having the vector representations that are the most similar to a given vector representation of a word.
     # @param vector [Object] A vector representation of a word (whether existing or non-existing)
     # @return [Array<Hash{:key => Integer, :text => String, :best_rows => Array<Float>, :score => Float}>] An array of hash objects each contains the `key`, `text`, `best_row` and similarity `score` of a lexeme
@@ -386,18 +395,24 @@ module Spacy
       chunk_array = []
       py_chunks = PyCall::List.(@py_span.noun_chunks)
       py_chunks.each do |py_span|
-        chunk_array << Spacy::Span.new(@doc, py_span: py_span)
+        chunk_array << Span.new(@doc, py_span: py_span)
       end
       chunk_array
     end
+    # Returns the head token
+    # @return [Token]
+    def root
+      Token.new(@py_span.root)
+    end
     # Returns an array of spans that represents sentences.
     # @return [Array<Span>]
     def sents
       sentence_array = []
       py_sentences = PyCall::List.(@py_span.sents)
       py_sentences.each do |py_span|
-        sentence_array << Spacy::Span.new(@doc, py_span: py_span)
+        sentence_array << Span.new(@doc, py_span: py_span)
       end
       sentence_array
     end
@@ -407,7 +422,7 @@ module Spacy
     def ents
       ent_array = []
       PyCall::List.(@py_span.ents).each do |py_span|
-        ent_array << Spacy::Span.new(@doc, py_span: py_span)
+        ent_array << Span.new(@doc, py_span: py_span)
       end
       ent_array
     end
@@ -416,7 +431,7 @@ module Spacy
     # @return [Span]
     def sent
       py_span = @py_span.sent
-      return Spacy::Span.new(@doc, py_span: py_span)
+      return Span.new(@doc, py_span: py_span)
     end
     # Returns a span if a range object is given or a token if an integer representing the position of the doc is given.
@@ -424,9 +439,9 @@ module Spacy
     def [](range)
       if range.is_a?(Range)
         py_span = @py_span[range]
-        return Spacy::Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
+        return Span.new(@doc, start_index: py_span.start, end_index: py_span.end - 1)
       else
-        return Spacy::Token.new(@py_span[range])
+        return Token.new(@py_span[range])
       end
     end
@@ -440,7 +455,7 @@ module Spacy
     # Creates a document instance from the span
     # @return [Doc]
     def as_doc
-      Spacy::Doc.new(@doc.py_nlp, text: self.text)
+      Doc.new(@doc.py_nlp, text: self.text)
     end
     # Returns tokens conjugated to the root of the span.
@@ -448,7 +463,7 @@ module Spacy
     def conjuncts
       conjunct_array = []
       PyCall::List.(@py_span.conjuncts).each do |py_conjunct|
-        conjunct_array << Spacy::Token.new(py_conjunct)
+        conjunct_array << Token.new(py_conjunct)
       end
       conjunct_array
     end
@@ -458,7 +473,7 @@ module Spacy
     def lefts
       left_array = []
       PyCall::List.(@py_span.lefts).each do |py_left|
-        left_array << Spacy::Token.new(py_left)
+        left_array << Token.new(py_left)
       end
       left_array
     end
@@ -468,7 +483,7 @@ module Spacy
     def rights
       right_array = []
       PyCall::List.(@py_span.rights).each do |py_right|
-        right_array << Spacy::Token.new(py_right)
+        right_array << Token.new(py_right)
       end
       right_array
     end
@@ -478,11 +493,17 @@ module Spacy
     def subtree
       subtree_array = []
       PyCall::List.(@py_span.subtree).each do |py_subtree|
-        subtree_array << Spacy::Token.new(py_subtree)
+        subtree_array << Token.new(py_subtree)
       end
       subtree_array
     end
+    # Returns the label
+    # @return [String]
+    def label
+      @py_span.label_
+    end
     # Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
     def method_missing(name, *args)
       @py_span.send(name, *args)
@@ -506,52 +527,59 @@ module Spacy
       @text = @py_token.text
     end
+    # Returns the head token
+    # @return [Token]
+    def head
+      Token.new(@py_token.head)
+    end
     # Returns the token in question and the tokens that descend from it.
-    # @return [Array<Object>] an (Ruby) array of Python `Token` objects
+    # @return [Array<Token>] an array of tokens
     def subtree
       descendant_array = []
       PyCall::List.(@py_token.subtree).each do |descendant|
-        descendant_array << descendant
+        descendant_array << Token.new(descendant)
       end
       descendant_array
     end
     # Returns the token's ancestors.
-    # @return [Array<Object>] an (Ruby) array of Python `Token` objects
+    # @return [Array<Token>] an array of tokens
     def ancestors
       ancestor_array = []
       PyCall::List.(@py_token.ancestors).each do |ancestor|
-        ancestor_array << ancestor
+        ancestor_array << Token.new(ancestor)
       end
       ancestor_array
     end
     # Returns a sequence of the token's immediate syntactic children.
-    # @return [Array<Object>] an (Ruby) array of Python `Token` objects
+    # @return [Array<Token>] an array of tokens
     def children
       child_array = []
       PyCall::List.(@py_token.children).each do |child|
-        child_array << child
+        child_array << Token.new(child)
       end
       child_array
     end
     # The leftward immediate children of the word in the syntactic dependency parse.
-    # @return [Array<Object>] an (Ruby) array of Python `Token` objects
+    # @return [Array<Token>] an array of tokens
     def lefts
       token_array = []
       PyCall::List.(@py_token.lefts).each do |token|
-        token_array << token
+        token_array << Token.new(token)
       end
       token_array
     end
     # The rightward immediate children of the word in the syntactic dependency parse.
-    # @return [Array<Object>] an (Ruby) array of Python `Token` objects
+    # @return [Array<Token>] an array of tokens
     def rights
       token_array = []
       PyCall::List.(@py_token.rights).each do |token|
-        token_array << token
+        token_array << Token.new(token)
       end
       token_array
     end
@@ -582,12 +610,143 @@ module Spacy
       end
     end
+    # Returns the lemma by calling `lemma_' of `@py_token` object
+    # @return [String]
+    def lemma
+      @py_token.lemma_
+    end
+    # Returns the lowercase form by calling `lower_' of `@py_token` object
+    # @return [String]
+    def lower
+      @py_token.lower_
+    end
+    # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_token` object
+    # @return [String]
+    def shape
+      @py_token.shape_
+    end
+    # Returns the pos by calling `pos_' of `@py_token` object
+    # @return [String]
+    def pos
+      @py_token.pos_
+    end
+    # Returns the fine-grained pos by calling `tag_' of `@py_token` object
+    # @return [String]
+    def tag
+      @py_token.tag_
+    end
+    # Returns the dependency relation by calling `dep_' of `@py_token` object
+    # @return [String]
+    def dep
+      @py_token.dep_
+    end
+    # Returns the language by calling `lang_' of `@py_token` object
+    # @return [String]
+    def lang
+      @py_token.lang_
+    end
+    # Returns the trailing space character if present by calling `whitespace_' of `@py_token` object
+    # @return [String]
+    def whitespace
+      @py_token.whitespace_
+    end
+    # Returns the named entity type by calling `ent_type_' of `@py_token` object
+    # @return [String]
+    def ent_type
+      @py_token.ent_type_
+    end
+    # Returns a lexeme object
+    # @return [Lexeme]
+    def lexeme
+      Lexeme.new(@py_token.lex)
+    end
     # Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
     def method_missing(name, *args)
       @py_token.send(name, *args)
     end
   end
+  # See also spaCy Python API document for [`Lexeme`](https://spacy.io/api/lexeme).
+  class Lexeme
+    # @return [Object] a Python `Lexeme` instance accessible via `PyCall`
+    attr_reader :py_lexeme
+    # @return [String] a string representing the token
+    attr_reader :text
+    # It is recommended to use {Language#vocab} or {Token#lexeme} methods to create tokens.
+    # There is no way to generate a lexeme from scratch but relying on a pre-exising Python {Lexeme} object.
+    # @param py_lexeme [Object] Python `Lexeme` object
+    def initialize(py_lexeme)
+      @py_lexeme = py_lexeme
+      @text = @py_lexeme.text
+    end
+    # String representation of the token.
+    # @return [String]
+    def to_s
+      @text
+    end
+    # Returns the lowercase form by calling `lower_' of `@py_lexeme` object
+    # @return [String]
+    def lower
+      @py_lexeme.lower_
+    end
+    # Returns the shape (e.g. "Xxxxx") by calling `shape_' of `@py_lexeme` object
+    # @return [String]
+    def shape
+      @py_lexeme.shape_
+    end
+    # Returns the language by calling `lang_' of `@py_lexeme` object
+    # @return [String]
+    def lang
+      @py_lexeme.lang_
+    end
+    # Returns the length-N substring from the start of the word by calling `prefix_' of `@py_lexeme` object
+    # @return [String]
+    def prefix
+      @py_lexeme.prefix_
+    end
+    #
+    # Returns the length-N substring from the end of the word by calling `suffix_' of `@py_lexeme` object
+    # @return [String]
+    def suffix
+      @py_lexeme.suffix_
+    end
+    # Returns the lexemes's norm, i.e. a normalized form of the lexeme calling `norm_' of `@py_lexeme` object
+    # @return [String]
+    def norm
+      @py_lexeme.norm_
+    end
+    # Returns a semantic similarity estimate.
+    # @param other [Lexeme] the other doc to which a similarity estimation is made
+    # @return [Float]
+    def similarity(other)
+      @py_lexeme.similarity(other.py_lexeme)
+    end
+    # Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
+    def method_missing(name, *args)
+      @py_lexeme.send(name, *args)
+    end
+  end
 end

data/lib/ruby-spacy/version.rb CHANGED Viewed

@@ -2,5 +2,5 @@
 module Spacy
   # The version number of the module
-  VERSION = "0.1.3"
+  VERSION = "0.1.4"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ruby-spacy
 version: !ruby/object:Gem::Version
-  version: 0.1.3
+  version: 0.1.4
 platform: ruby
 authors:
 - Yoichiro Hasebe
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-06-28 00:00:00.000000000 Z
+date: 2021-07-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: pycall
@@ -123,6 +123,7 @@ files:
 - examples/linguistic_features/rule_based_morphology.rb
 - examples/linguistic_features/sentence_segmentation.rb
 - examples/linguistic_features/similarity.rb
+- examples/linguistic_features/similarity_between_lexemes.rb
 - examples/linguistic_features/similarity_between_spans.rb
 - examples/linguistic_features/tokenization.rb
 - examples/rule_based_matching/creating_spans_from_matches.rb
@@ -149,7 +150,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.2.3
+rubygems_version: 3.2.11
 signing_key:
 specification_version: 4
 summary: A wrapper module for using spaCy natural language processing library from