ruby-spacy 0.1.0 → 0.1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +23 -0
  3. data/Gemfile.lock +3 -1
  4. data/README.md +123 -77
  5. data/examples/get_started/lexeme.rb +2 -2
  6. data/examples/get_started/linguistic_annotations.rb +1 -1
  7. data/examples/get_started/morphology.rb +45 -0
  8. data/examples/get_started/most_similar.rb +28 -27
  9. data/examples/get_started/named_entities.rb +1 -1
  10. data/examples/get_started/pos_tags_and_dependencies.rb +18 -18
  11. data/examples/get_started/similarity.rb +2 -2
  12. data/examples/japanese/ancestors.rb +9 -11
  13. data/examples/japanese/entity_annotations_and_labels.rb +1 -1
  14. data/examples/japanese/lemmatization.rb +1 -1
  15. data/examples/japanese/most_similar.rb +28 -27
  16. data/examples/japanese/named_entity_recognition.rb +1 -1
  17. data/examples/japanese/navigating_parse_tree.rb +18 -18
  18. data/examples/japanese/noun_chunks.rb +1 -1
  19. data/examples/japanese/pos_tagging.rb +20 -20
  20. data/examples/japanese/visualizing_dependencies.rb +2 -2
  21. data/examples/japanese/visualizing_named_entities.rb +1 -1
  22. data/examples/linguistic_features/ancestors.rb +13 -10
  23. data/examples/linguistic_features/entity_annotations_and_labels.rb +1 -1
  24. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +2 -2
  25. data/examples/linguistic_features/information_extraction.rb +2 -2
  26. data/examples/linguistic_features/iterating_children.rb +2 -2
  27. data/examples/linguistic_features/iterating_lefts_and_rights.rb +5 -5
  28. data/examples/linguistic_features/lemmatization.rb +1 -1
  29. data/examples/linguistic_features/named_entity_recognition.rb +1 -1
  30. data/examples/linguistic_features/navigating_parse_tree.rb +12 -12
  31. data/examples/linguistic_features/noun_chunks.rb +1 -1
  32. data/examples/linguistic_features/pos_tagging.rb +1 -1
  33. data/examples/linguistic_features/retokenize_1.rb +1 -1
  34. data/examples/linguistic_features/retokenize_2.rb +2 -2
  35. data/examples/linguistic_features/rule_based_morphology.rb +1 -1
  36. data/examples/linguistic_features/similarity.rb +2 -2
  37. data/examples/linguistic_features/similarity_between_lexemes.rb +18 -0
  38. data/examples/linguistic_features/similarity_between_spans.rb +2 -2
  39. data/examples/rule_based_matching/creating_spans_from_matches.rb +1 -1
  40. data/lib/ruby-spacy.rb +493 -300
  41. data/lib/ruby-spacy/version.rb +1 -1
  42. data/ruby-spacy.gemspec +1 -1
  43. metadata +6 -5
  44. data/examples/linguistic_features/morphology.rb +0 -17
  45. data/examples/linguistic_features/special_case_tokenization_rules.rb +0 -19
@@ -8,7 +8,7 @@ headings = ["text", "start_char", "end_char", "label"]
8
8
  rows = []
9
9
 
10
10
  doc.ents.each do |ent|
11
- rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
11
+ rows << [ent.text, ent.start_char, ent.end_char, ent.label]
12
12
  end
13
13
 
14
14
  table = Terminal::Table.new rows: rows, headings: headings
@@ -2,30 +2,30 @@ require "ruby-spacy"
2
2
  require "terminal-table"
3
3
 
4
4
  nlp = Spacy::Language.new("en_core_web_sm")
5
- doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
5
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion.")
6
6
 
7
- headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
7
+ headings = ["text", "lemma", "pos", "tag", "dep"]
8
8
  rows = []
9
9
 
10
10
  doc.each do |token|
11
- rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
11
+ rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
12
12
  end
13
13
 
14
14
  table = Terminal::Table.new rows: rows, headings: headings
15
15
  puts table
16
16
 
17
- # +---------+---------+-------+-----+----------+-------+----------+---------+
18
- # | text | lemma | pos | tag | dep | shape | is_alpha | is_stop |
19
- # +---------+---------+-------+-----+----------+-------+----------+---------+
20
- # | Apple | Apple | PROPN | NNP | nsubj | Xxxxx | true | false |
21
- # | is | be | AUX | VBZ | aux | xx | true | true |
22
- # | looking | look | VERB | VBG | ROOT | xxxx | true | false |
23
- # | at | at | ADP | IN | prep | xx | true | true |
24
- # | buying | buy | VERB | VBG | pcomp | xxxx | true | false |
25
- # | U.K. | U.K. | PROPN | NNP | dobj | X.X. | false | false |
26
- # | startup | startup | NOUN | NN | advcl | xxxx | true | false |
27
- # | for | for | ADP | IN | prep | xxx | true | true |
28
- # | $ | $ | SYM | $ | quantmod | $ | false | false |
29
- # | 1 | 1 | NUM | CD | compound | d | false | false |
30
- # | billion | billion | NUM | CD | pobj | xxxx | true | false |
31
- # +---------+---------+-------+-----+----------+-------+----------+---------+
17
+ # +---------+---------+-------+-----+----------+
18
+ # | text | lemma | pos | tag | dep |
19
+ # +---------+---------+-------+-----+----------+
20
+ # | Apple | Apple | PROPN | NNP | nsubj |
21
+ # | is | be | AUX | VBZ | aux |
22
+ # | looking | look | VERB | VBG | ROOT |
23
+ # | at | at | ADP | IN | prep |
24
+ # | buying | buy | VERB | VBG | pcomp |
25
+ # | U.K. | U.K. | PROPN | NNP | dobj |
26
+ # | startup | startup | NOUN | NN | advcl |
27
+ # | for | for | ADP | IN | prep |
28
+ # | $ | $ | SYM | $ | quantmod |
29
+ # | 1 | 1 | NUM | CD | compound |
30
+ # | billion | billion | NUM | CD | pobj |
31
+ # +---------+---------+-------+-----+----------+
@@ -4,8 +4,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
4
4
  doc1 = nlp.read("I like salty fries and hamburgers.")
5
5
  doc2 = nlp.read("Fast food tastes very good.")
6
6
 
7
- puts "Doc 1: " + doc1
8
- puts "Doc 2: " + doc2
7
+ puts "Doc 1: " + doc1.text
8
+ puts "Doc 2: " + doc2.text
9
9
  puts "Similarity: #{doc1.similarity(doc2)}"
10
10
 
11
11
  # Doc 1: I like salty fries and hamburgers.
@@ -23,9 +23,7 @@ puts "The root of the sentence is: " + root.text
23
23
  puts "The subject of the sentence is: " + subject.text
24
24
 
25
25
  subject.subtree.each do |descendant|
26
- # need to convert "ancestors" object from a python generator to a ruby array
27
- ancestors = Spacy::generator_to_array(descendant.ancestors)
28
- rows << [descendant.text, descendant.dep_, descendant.n_lefts, descendant.n_rights, ancestors]
26
+ rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, descendant.ancestors.map(&:text).join(", ")]
29
27
  end
30
28
 
31
29
  table = Terminal::Table.new rows: rows, headings: headings
@@ -34,11 +32,11 @@ puts table
34
32
  # The sentence: 私の父は寿司が好きだ。
35
33
  # The root of the sentence is: 好き
36
34
  # The subject of the sentence is: 父
37
- # +------+------------+---------+----------+----------------+
38
- # | text | dep | n_lefts | n_rights | ancestors |
39
- # +------+------------+---------+----------+----------------+
40
- # | 私 | nmod | 0 | 1 | [父, 好き] |
41
- # | の | case | 0 | 0 | [私, 父, 好き] |
42
- # | 父 | dislocated | 1 | 1 | [好き] |
43
- # | は | case | 0 | 0 | [父, 好き] |
44
- # +------+------------+---------+----------+----------------+
35
+ # +------+------------+---------+----------+--------------+
36
+ # | text | dep | n_lefts | n_rights | ancestors |
37
+ # +------+------------+---------+----------+--------------+
38
+ # | 私 | nmod | 0 | 1 | 父, 好き |
39
+ # | の | case | 0 | 0 | 私, 父, 好き |
40
+ # | 父 | dislocated | 1 | 1 | 好き |
41
+ # | は | case | 0 | 0 | 父, 好き |
42
+ # +------+------------+---------+----------+--------------+
@@ -10,7 +10,7 @@ headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
10
10
  rows = []
11
11
 
12
12
  doc.each do |ent|
13
- rows << [ent.text, ent.ent_iob, ent.ent_iob_, ent.ent_type_]
13
+ rows << [ent.text, ent.ent_iob, ent.ent_iob_, ent.ent_type]
14
14
  end
15
15
 
16
16
  table = Terminal::Table.new rows: rows, headings: headings
@@ -9,7 +9,7 @@ headings = ["text", "lemma"]
9
9
  rows = []
10
10
 
11
11
  doc.each do |token|
12
- rows << [token.text, token.lemma_]
12
+ rows << [token.text, token.lemma]
13
13
  end
14
14
 
15
15
  table = Terminal::Table.new rows: rows, headings: headings
@@ -9,38 +9,39 @@ france = nlp.get_lexeme("フランス")
9
9
 
10
10
  query = tokyo.vector - japan.vector + france.vector
11
11
 
12
- headings = ["key", "text", "score"]
12
+ headings = ["rank", "text", "score"]
13
13
  rows = []
14
14
 
15
15
  results = nlp.most_similar(query, 20)
16
- results.each do |lexeme|
17
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
16
+ results.each_with_index do |lexeme, i|
17
+ index = (i + 1).to_s
18
+ rows << [index, lexeme.text, lexeme.score]
18
19
  end
19
20
 
20
21
  table = Terminal::Table.new rows: rows, headings: headings
21
22
  puts table
22
23
 
23
- # +----------------------+----------------+--------------------+
24
- # | key | text | score |
25
- # +----------------------+----------------+--------------------+
26
- # | 12090003238699662352 | パリ | 0.7376999855041504 |
27
- # | 18290786970454458111 | フランス | 0.7221999764442444 |
28
- # | 9360021637096476946 | 東京 | 0.6697999835014343 |
29
- # | 2437546359230213520 | ストラスブール | 0.631600022315979 |
30
- # | 13988178952745813186 | リヨン | 0.5939000248908997 |
31
- # | 10427160276079242800 | Paris | 0.574400007724762 |
32
- # | 5562396768860926997 | ベルギー | 0.5683000087738037 |
33
- # | 15029176915627965481 | ニース | 0.5679000020027161 |
34
- # | 9750625950625019690 | アルザス | 0.5644999742507935 |
35
- # | 2381640614569534741 | 南仏 | 0.5547999739646912 |
36
- # | 7486004458946554189 | ロンドン | 0.5525000095367432 |
37
- # | 7457654095417343716 | モンマルトル | 0.5453000068664551 |
38
- # | 14063777960246535660 | ブローニュ | 0.5338000059127808 |
39
- # | 3297880777656467136 | トゥールーズ | 0.5275999903678894 |
40
- # | 3059066136348671923 | バスティーユ | 0.5213000178337097 |
41
- # | 2423471048892368989 | フランス人 | 0.5194000005722046 |
42
- # | 15944886306236465675 | ロレーヌ | 0.5148000121116638 |
43
- # | 9592561648283566590 | モンパルナス | 0.513700008392334 |
44
- # | 6560045335275831141 | 渡仏 | 0.5131000280380249 |
45
- # | 8597467336360225096 | イタリア | 0.5127000212669373 |
46
- # +----------------------+----------------+--------------------+
24
+ # +------+----------------+--------------------+
25
+ # | rank | text | score |
26
+ # +------+----------------+--------------------+
27
+ # | 1 | パリ | 0.7376999855041504 |
28
+ # | 2 | フランス | 0.7221999764442444 |
29
+ # | 3 | 東京 | 0.6697999835014343 |
30
+ # | 4 | ストラスブール | 0.631600022315979 |
31
+ # | 5 | リヨン | 0.5939000248908997 |
32
+ # | 6 | Paris | 0.574400007724762 |
33
+ # | 7 | ベルギー | 0.5683000087738037 |
34
+ # | 8 | ニース | 0.5679000020027161 |
35
+ # | 9 | アルザス | 0.5644999742507935 |
36
+ # | 10 | 南仏 | 0.5547999739646912 |
37
+ # | 11 | ロンドン | 0.5525000095367432 |
38
+ # | 12 | モンマルトル | 0.5453000068664551 |
39
+ # | 13 | ブローニュ | 0.5338000059127808 |
40
+ # | 14 | トゥールーズ | 0.5275999903678894 |
41
+ # | 15 | バスティーユ | 0.5213000178337097 |
42
+ # | 16 | フランス人 | 0.5194000005722046 |
43
+ # | 17 | ロレーヌ | 0.5148000121116638 |
44
+ # | 18 | モンパルナス | 0.513700008392334 |
45
+ # | 19 | 渡仏 | 0.5131000280380249 |
46
+ # | 20 | イタリア | 0.5127000212669373 |
47
+ # +------+----------------+--------------------+
@@ -10,7 +10,7 @@ headings = ["text", "start", "end", "label"]
10
10
  rows = []
11
11
 
12
12
  doc.ents.each do |ent|
13
- rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
13
+ rows << [ent.text, ent.start_char, ent.end_char, ent.label]
14
14
  end
15
15
 
16
16
  table = Terminal::Table.new rows: rows, headings: headings
@@ -9,26 +9,26 @@ headings = ["text", "dep", "head text", "head pos", "children"]
9
9
  rows = []
10
10
 
11
11
  doc.each do |token|
12
- rows << [token.text, token.dep_, token.head.text, token.head.pos_, token.children.to_s]
12
+ rows << [token.text, token.dep, token.head.text, token.head.pos, token.children.map(&:text).join(", ")]
13
13
  end
14
14
 
15
15
  table = Terminal::Table.new rows: rows, headings: headings
16
16
  puts table
17
17
 
18
- # +------+----------+-----------+----------+--------------------------+
19
- # | text | dep | head text | head pos | children |
20
- # +------+----------+-----------+----------+--------------------------+
21
- # | 自動 | compound | 車 | NOUN | [] |
22
- # | 運転 | compound | 車 | NOUN | [] |
23
- # | 車 | nsubj | 転嫁 | VERB | [自動, 運転, は] |
24
- # | は | case | 車 | NOUN | [] |
25
- # | 保険 | compound | 責任 | NOUN | [] |
26
- # | 責任 | obj | 転嫁 | VERB | [保険, を] |
27
- # | を | case | 責任 | NOUN | [] |
28
- # | 製造 | compound | 者 | NOUN | [] |
29
- # | 者 | obl | 転嫁 | VERB | [製造, に] |
30
- # | に | case | 者 | NOUN | [] |
31
- # | 転嫁 | ROOT | 転嫁 | VERB | [車, 責任, 者, する, 。] |
32
- # | する | aux | 転嫁 | VERB | [] |
33
- # | 。 | punct | 転嫁 | VERB | [] |
34
- # +------+----------+-----------+----------+--------------------------+
18
+ +------+----------+-----------+----------+------------------------+
19
+ | text | dep | head text | head pos | children |
20
+ +------+----------+-----------+----------+------------------------+
21
+ | 自動 | compound | 車 | 92 | |
22
+ | 運転 | compound | 車 | 92 | |
23
+ | 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
24
+ | は | case | 車 | 92 | |
25
+ | 保険 | compound | 責任 | 92 | |
26
+ | 責任 | obj | 転嫁 | 100 | 保険, を |
27
+ | を | case | 責任 | 92 | |
28
+ | 製造 | compound | 者 | 92 | |
29
+ | 者 | obl | 転嫁 | 100 | 製造, に |
30
+ | に | case | 者 | 92 | |
31
+ | 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
32
+ | する | aux | 転嫁 | 100 | |
33
+ | 。 | punct | 転嫁 | 100 | |
34
+ +------+----------+-----------+----------+------------------------+
@@ -9,7 +9,7 @@ headings = ["text", "root.text", "root.dep", "root.head.text"]
9
9
  rows = []
10
10
 
11
11
  doc.noun_chunks.each do |chunk|
12
- rows << [chunk.text, chunk.root.text, chunk.root.dep_, chunk.root.head.text]
12
+ rows << [chunk.text, chunk.root.text, chunk.root.dep, chunk.root.head.text]
13
13
  end
14
14
 
15
15
  table = Terminal::Table.new rows: rows, headings: headings
@@ -4,31 +4,31 @@ require "terminal-table"
4
4
  nlp = Spacy::Language.new("ja_core_news_lg")
5
5
  doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
6
6
 
7
- headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
7
+ headings = ["text", "lemma", "pos", "tag", "dep"]
8
8
  rows = []
9
9
 
10
10
  doc.each do |token|
11
- rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
11
+ rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
12
12
  end
13
13
 
14
14
  table = Terminal::Table.new rows: rows, headings: headings
15
15
  puts table
16
16
 
17
- # +------------+------------+-------+--------------------------+--------+--------+----------+---------+
18
- # | text | lemma | pos | tag | dep | shape | is_alpha | is_stop |
19
- # +------------+------------+-------+--------------------------+--------+--------+----------+---------+
20
- # | 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj | xxx | true | false |
21
- # | は | は | ADP | 助詞-係助詞 | case | x | true | true |
22
- # | 1983 | 1983 | NUM | 名詞-数詞 | nummod | dddd | false | false |
23
- # | 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl | x | true | false |
24
- # | に | に | ADP | 助詞-格助詞 | case | x | true | true |
25
- # | ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj | xxxx | true | false |
26
- # | を | を | ADP | 助詞-格助詞 | case | x | true | true |
27
- # | 14,800 | 14,800 | NUM | 名詞-数詞 | fixed | dd,ddd | false | false |
28
- # | 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl | x | true | false |
29
- # | で | で | ADP | 助詞-格助詞 | case | x | true | true |
30
- # | 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT | xx | true | false |
31
- # | し | する | AUX | 動詞-非自立可能 | aux | x | true | true |
32
- # | た | た | AUX | 助動詞 | aux | x | true | true |
33
- # | 。 | 。 | PUNCT | 補助記号-句点 | punct | 。 | false | false |
34
- # +------------+------------+-------+--------------------------+--------+--------+----------+---------+
17
+ # +------------+------------+-------+--------------------------+--------+
18
+ # | text | lemma | pos | tag | dep |
19
+ # +------------+------------+-------+--------------------------+--------+
20
+ # | 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
21
+ # | は | は | ADP | 助詞-係助詞 | case |
22
+ # | 1983 | 1983 | NUM | 名詞-数詞 | nummod |
23
+ # | 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
24
+ # | に | に | ADP | 助詞-格助詞 | case |
25
+ # | ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
26
+ # | を | を | ADP | 助詞-格助詞 | case |
27
+ # | 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
28
+ # | 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
29
+ # | で | で | ADP | 助詞-格助詞 | case |
30
+ # | 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
31
+ # | し | する | AUX | 動詞-非自立可能 | aux |
32
+ # | た | た | AUX | 助動詞 | aux |
33
+ # | 。 | 。 | PUNCT | 補助記号-句点 | punct |
34
+ # +------------+------------+-------+--------------------------+--------+
@@ -6,8 +6,8 @@ nlp = Spacy::Language.new("ja_core_news_sm")
6
6
  sentence = "自動運転車は保険責任を製造者に転嫁する。"
7
7
  doc = nlp.read(sentence)
8
8
 
9
- dep_svg = doc.displacy('dep', false)
9
+ dep_svg = doc.displacy(style: 'dep', compact: false)
10
10
 
11
- File.open(File.join(File.dirname(__FILE__), "outputs/test_dep.svg"), "w") do |file|
11
+ File.open(File.join(File.dirname(__FILE__), "test_dep.svg"), "w") do |file|
12
12
  file.write(dep_svg)
13
13
  end
@@ -7,7 +7,7 @@ sentence ="セバスチアン・スランが2007年にグーグルで自動運
7
7
 
8
8
  doc = nlp.read(sentence)
9
9
 
10
- ent_html = doc.displacy('ent')
10
+ ent_html = doc.displacy(style: 'ent')
11
11
 
12
12
  File.open(File.join(File.dirname(__FILE__), "outputs/test_ent.html"), "w") do |file|
13
13
  file.write(ent_html)
@@ -24,18 +24,21 @@ puts "The subject of the sentence is: " + subject.text
24
24
  subject.subtree.each do |descendant|
25
25
  # need to convert "ancestors" object from a python generator to a ruby array
26
26
  ancestors = Spacy::generator_to_array(descendant.ancestors)
27
- rows << [descendant.text, descendant.dep_, descendant.n_lefts, descendant.n_rights, ancestors]
27
+ rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, ancestors.map(&:text).join(", ")]
28
28
  end
29
29
 
30
30
  table = Terminal::Table.new rows: rows, headings: headings
31
31
  print table
32
32
 
33
- # +----------+----------+---------+----------+------------------------------------+
34
- # | text | dep | n_lefts | n_rights | ancestors |
35
- # +----------+----------+---------+----------+------------------------------------+
36
- # | Credit | nmod | 0 | 2 | [holders, submit] |
37
- # | and | cc | 0 | 0 | [Credit, holders, submit] |
38
- # | mortgage | compound | 0 | 0 | [account, Credit, holders, submit] |
39
- # | account | conj | 1 | 0 | [Credit, holders, submit] |
40
- # | holders | nsubj | 1 | 0 | [submit] |
41
- # +----------+----------+---------+----------+------------------------------------+
33
+ # The sentence: Credit and mortgage account holders must submit their requests
34
+ # The root of the sentence is: submit
35
+ # The subject of the sentence is: holders
36
+ # +----------+----------+---------+----------+----------------------------------+
37
+ # | text | dep | n_lefts | n_rights | ancestors |
38
+ # +----------+----------+---------+----------+----------------------------------+
39
+ # | Credit | nmod | 0 | 2 | holders, submit |
40
+ # | and | cc | 0 | 0 | Credit, holders, submit |
41
+ # | mortgage | compound | 0 | 0 | account, Credit, holders, submit |
42
+ # | account | conj | 1 | 0 | Credit, holders, submit |
43
+ # | holders | nsubj | 1 | 0 | submit |
44
+ # +----------+----------+---------+----------+----------------------------------+
@@ -10,7 +10,7 @@ headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
10
10
  rows = []
11
11
 
12
12
  doc.each do |ent|
13
- rows << [ent.text, ent.ent_iob, ent.ent_iob_, ent.ent_type_]
13
+ rows << [ent.text, ent.ent_iob, ent.ent_iob_, ent.ent_type]
14
14
  end
15
15
 
16
16
  table = Terminal::Table.new rows: rows, headings: headings
@@ -10,11 +10,11 @@ results = []
10
10
 
11
11
  doc.each do |token|
12
12
  if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
13
- results << token.head
13
+ results << token.head.text
14
14
  end
15
15
  end
16
16
 
17
17
  puts results.to_s
18
18
 
19
- # [shift]
19
+ # ["shift"]
20
20
 
@@ -19,11 +19,11 @@ texts.each do |text|
19
19
  doc.each do |token|
20
20
  if token.ent_type_ == "MONEY"
21
21
  if ["attr", "dobj"].index token.dep_
22
- subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep_ == "nsubj"}
22
+ subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep == "nsubj"}
23
23
  if !subj.empty?
24
24
  puts(subj[0].text + " --> " + token.text)
25
25
  end
26
- elsif token.dep_ == "pobj" and token.head.dep_ == "prep"
26
+ elsif token.dep_ == "pobj" and token.head.dep == "prep"
27
27
  puts token.head.head.text + " --> " + token.text
28
28
  end
29
29
  end
@@ -12,7 +12,7 @@ doc.each do |token|
12
12
  if token.pos_ == "VERB"
13
13
  token.children.each do |child|
14
14
  if child.dep_ == "nsubj"
15
- results << child.head
15
+ results << child.head.text
16
16
  end
17
17
  end
18
18
  end
@@ -20,5 +20,5 @@ end
20
20
 
21
21
  puts results.to_s
22
22
 
23
- # [shift]
23
+ # ["shift"]
24
24
 
@@ -5,16 +5,16 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
5
 
6
6
  doc = nlp.read("bright red apples on the tree")
7
7
 
8
- puts "Text: " + doc
8
+ puts "Text: " + doc.text
9
9
 
10
- puts "Words to the left of 'apple': " + Spacy.generator_to_array(doc[2].lefts).to_s
11
- puts "Words to the right of 'apple': " + Spacy.generator_to_array(doc[2].rights).to_s
10
+ puts "Words to the left of 'apple': " + doc[2].lefts.map(&:text).join(", ")
11
+ puts "Words to the right of 'apple': " + doc[2].rights.map(&:text).join(", ")
12
12
 
13
13
  puts "Num of the words to the left of 'apple': " + doc[2].n_lefts.to_s
14
14
  puts "Num of the words to the right of 'apple': " + doc[2].n_rights.to_s
15
15
 
16
16
  # Text: bright red apples on the tree
17
- # Words to the left of 'apple': [bright, red]
18
- # Words to the right of 'apple': [on]
17
+ # Words to the left of 'apple': bright, red
18
+ # Words to the right of 'apple': on
19
19
  # Num of the words to the left of 'apple': 2
20
20
  # Num of the words to the right of 'apple': 1
@@ -12,7 +12,7 @@ headings = ["lemma"]
12
12
  rows = []
13
13
 
14
14
  doc.each do |token|
15
- rows << [token.lemma_]
15
+ rows << [token.lemma]
16
16
  end
17
17
 
18
18
  table = Terminal::Table.new rows: rows, headings: headings