ruby-spacy 0.1.0 → 0.1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +23 -0
  3. data/Gemfile.lock +3 -1
  4. data/README.md +123 -77
  5. data/examples/get_started/lexeme.rb +2 -2
  6. data/examples/get_started/linguistic_annotations.rb +1 -1
  7. data/examples/get_started/morphology.rb +45 -0
  8. data/examples/get_started/most_similar.rb +28 -27
  9. data/examples/get_started/named_entities.rb +1 -1
  10. data/examples/get_started/pos_tags_and_dependencies.rb +18 -18
  11. data/examples/get_started/similarity.rb +2 -2
  12. data/examples/japanese/ancestors.rb +9 -11
  13. data/examples/japanese/entity_annotations_and_labels.rb +1 -1
  14. data/examples/japanese/lemmatization.rb +1 -1
  15. data/examples/japanese/most_similar.rb +28 -27
  16. data/examples/japanese/named_entity_recognition.rb +1 -1
  17. data/examples/japanese/navigating_parse_tree.rb +18 -18
  18. data/examples/japanese/noun_chunks.rb +1 -1
  19. data/examples/japanese/pos_tagging.rb +20 -20
  20. data/examples/japanese/visualizing_dependencies.rb +2 -2
  21. data/examples/japanese/visualizing_named_entities.rb +1 -1
  22. data/examples/linguistic_features/ancestors.rb +13 -10
  23. data/examples/linguistic_features/entity_annotations_and_labels.rb +1 -1
  24. data/examples/linguistic_features/finding_a_verb_with_a_subject.rb +2 -2
  25. data/examples/linguistic_features/information_extraction.rb +2 -2
  26. data/examples/linguistic_features/iterating_children.rb +2 -2
  27. data/examples/linguistic_features/iterating_lefts_and_rights.rb +5 -5
  28. data/examples/linguistic_features/lemmatization.rb +1 -1
  29. data/examples/linguistic_features/named_entity_recognition.rb +1 -1
  30. data/examples/linguistic_features/navigating_parse_tree.rb +12 -12
  31. data/examples/linguistic_features/noun_chunks.rb +1 -1
  32. data/examples/linguistic_features/pos_tagging.rb +1 -1
  33. data/examples/linguistic_features/retokenize_1.rb +1 -1
  34. data/examples/linguistic_features/retokenize_2.rb +2 -2
  35. data/examples/linguistic_features/rule_based_morphology.rb +1 -1
  36. data/examples/linguistic_features/similarity.rb +2 -2
  37. data/examples/linguistic_features/similarity_between_lexemes.rb +18 -0
  38. data/examples/linguistic_features/similarity_between_spans.rb +2 -2
  39. data/examples/rule_based_matching/creating_spans_from_matches.rb +1 -1
  40. data/lib/ruby-spacy.rb +493 -300
  41. data/lib/ruby-spacy/version.rb +1 -1
  42. data/ruby-spacy.gemspec +1 -1
  43. metadata +6 -5
  44. data/examples/linguistic_features/morphology.rb +0 -17
  45. data/examples/linguistic_features/special_case_tokenization_rules.rb +0 -19
@@ -8,7 +8,7 @@ headings = ["text", "start_char", "end_char", "label"]
8
8
  rows = []
9
9
 
10
10
  doc.ents.each do |ent|
11
- rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
11
+ rows << [ent.text, ent.start_char, ent.end_char, ent.label]
12
12
  end
13
13
 
14
14
  table = Terminal::Table.new rows: rows, headings: headings
@@ -2,30 +2,30 @@ require "ruby-spacy"
2
2
  require "terminal-table"
3
3
 
4
4
  nlp = Spacy::Language.new("en_core_web_sm")
5
- doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion")
5
+ doc = nlp.read("Apple is looking at buying U.K. startup for $1 billion.")
6
6
 
7
- headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
7
+ headings = ["text", "lemma", "pos", "tag", "dep"]
8
8
  rows = []
9
9
 
10
10
  doc.each do |token|
11
- rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
11
+ rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
12
12
  end
13
13
 
14
14
  table = Terminal::Table.new rows: rows, headings: headings
15
15
  puts table
16
16
 
17
- # +---------+---------+-------+-----+----------+-------+----------+---------+
18
- # | text | lemma | pos | tag | dep | shape | is_alpha | is_stop |
19
- # +---------+---------+-------+-----+----------+-------+----------+---------+
20
- # | Apple | Apple | PROPN | NNP | nsubj | Xxxxx | true | false |
21
- # | is | be | AUX | VBZ | aux | xx | true | true |
22
- # | looking | look | VERB | VBG | ROOT | xxxx | true | false |
23
- # | at | at | ADP | IN | prep | xx | true | true |
24
- # | buying | buy | VERB | VBG | pcomp | xxxx | true | false |
25
- # | U.K. | U.K. | PROPN | NNP | dobj | X.X. | false | false |
26
- # | startup | startup | NOUN | NN | advcl | xxxx | true | false |
27
- # | for | for | ADP | IN | prep | xxx | true | true |
28
- # | $ | $ | SYM | $ | quantmod | $ | false | false |
29
- # | 1 | 1 | NUM | CD | compound | d | false | false |
30
- # | billion | billion | NUM | CD | pobj | xxxx | true | false |
31
- # +---------+---------+-------+-----+----------+-------+----------+---------+
17
+ # +---------+---------+-------+-----+----------+
18
+ # | text | lemma | pos | tag | dep |
19
+ # +---------+---------+-------+-----+----------+
20
+ # | Apple | Apple | PROPN | NNP | nsubj |
21
+ # | is | be | AUX | VBZ | aux |
22
+ # | looking | look | VERB | VBG | ROOT |
23
+ # | at | at | ADP | IN | prep |
24
+ # | buying | buy | VERB | VBG | pcomp |
25
+ # | U.K. | U.K. | PROPN | NNP | dobj |
26
+ # | startup | startup | NOUN | NN | advcl |
27
+ # | for | for | ADP | IN | prep |
28
+ # | $ | $ | SYM | $ | quantmod |
29
+ # | 1 | 1 | NUM | CD | compound |
30
+ # | billion | billion | NUM | CD | pobj |
31
+ # +---------+---------+-------+-----+----------+
@@ -4,8 +4,8 @@ nlp = Spacy::Language.new("en_core_web_lg")
4
4
  doc1 = nlp.read("I like salty fries and hamburgers.")
5
5
  doc2 = nlp.read("Fast food tastes very good.")
6
6
 
7
- puts "Doc 1: " + doc1
8
- puts "Doc 2: " + doc2
7
+ puts "Doc 1: " + doc1.text
8
+ puts "Doc 2: " + doc2.text
9
9
  puts "Similarity: #{doc1.similarity(doc2)}"
10
10
 
11
11
  # Doc 1: I like salty fries and hamburgers.
@@ -23,9 +23,7 @@ puts "The root of the sentence is: " + root.text
23
23
  puts "The subject of the sentence is: " + subject.text
24
24
 
25
25
  subject.subtree.each do |descendant|
26
- # need to convert "ancestors" object from a python generator to a ruby array
27
- ancestors = Spacy::generator_to_array(descendant.ancestors)
28
- rows << [descendant.text, descendant.dep_, descendant.n_lefts, descendant.n_rights, ancestors]
26
+ rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, descendant.ancestors.map(&:text).join(", ")]
29
27
  end
30
28
 
31
29
  table = Terminal::Table.new rows: rows, headings: headings
@@ -34,11 +32,11 @@ puts table
34
32
  # The sentence: 私の父は寿司が好きだ。
35
33
  # The root of the sentence is: 好き
36
34
  # The subject of the sentence is: 父
37
- # +------+------------+---------+----------+----------------+
38
- # | text | dep | n_lefts | n_rights | ancestors |
39
- # +------+------------+---------+----------+----------------+
40
- # | 私 | nmod | 0 | 1 | [父, 好き] |
41
- # | の | case | 0 | 0 | [私, 父, 好き] |
42
- # | 父 | dislocated | 1 | 1 | [好き] |
43
- # | は | case | 0 | 0 | [父, 好き] |
44
- # +------+------------+---------+----------+----------------+
35
+ # +------+------------+---------+----------+--------------+
36
+ # | text | dep | n_lefts | n_rights | ancestors |
37
+ # +------+------------+---------+----------+--------------+
38
+ # | 私 | nmod | 0 | 1 | 父, 好き |
39
+ # | の | case | 0 | 0 | 私, 父, 好き |
40
+ # | 父 | dislocated | 1 | 1 | 好き |
41
+ # | は | case | 0 | 0 | 父, 好き |
42
+ # +------+------------+---------+----------+--------------+
@@ -10,7 +10,7 @@ headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
10
10
  rows = []
11
11
 
12
12
  doc.each do |ent|
13
- rows << [ent.text, ent.ent_iob, ent.ent_iob_, ent.ent_type_]
13
+ rows << [ent.text, ent.ent_iob, ent.ent_iob_, ent.ent_type]
14
14
  end
15
15
 
16
16
  table = Terminal::Table.new rows: rows, headings: headings
@@ -9,7 +9,7 @@ headings = ["text", "lemma"]
9
9
  rows = []
10
10
 
11
11
  doc.each do |token|
12
- rows << [token.text, token.lemma_]
12
+ rows << [token.text, token.lemma]
13
13
  end
14
14
 
15
15
  table = Terminal::Table.new rows: rows, headings: headings
@@ -9,38 +9,39 @@ france = nlp.get_lexeme("フランス")
9
9
 
10
10
  query = tokyo.vector - japan.vector + france.vector
11
11
 
12
- headings = ["key", "text", "score"]
12
+ headings = ["rank", "text", "score"]
13
13
  rows = []
14
14
 
15
15
  results = nlp.most_similar(query, 20)
16
- results.each do |lexeme|
17
- rows << [lexeme[:key], lexeme[:text], lexeme[:score],]
16
+ results.each_with_index do |lexeme, i|
17
+ index = (i + 1).to_s
18
+ rows << [index, lexeme.text, lexeme.score]
18
19
  end
19
20
 
20
21
  table = Terminal::Table.new rows: rows, headings: headings
21
22
  puts table
22
23
 
23
- # +----------------------+----------------+--------------------+
24
- # | key | text | score |
25
- # +----------------------+----------------+--------------------+
26
- # | 12090003238699662352 | パリ | 0.7376999855041504 |
27
- # | 18290786970454458111 | フランス | 0.7221999764442444 |
28
- # | 9360021637096476946 | 東京 | 0.6697999835014343 |
29
- # | 2437546359230213520 | ストラスブール | 0.631600022315979 |
30
- # | 13988178952745813186 | リヨン | 0.5939000248908997 |
31
- # | 10427160276079242800 | Paris | 0.574400007724762 |
32
- # | 5562396768860926997 | ベルギー | 0.5683000087738037 |
33
- # | 15029176915627965481 | ニース | 0.5679000020027161 |
34
- # | 9750625950625019690 | アルザス | 0.5644999742507935 |
35
- # | 2381640614569534741 | 南仏 | 0.5547999739646912 |
36
- # | 7486004458946554189 | ロンドン | 0.5525000095367432 |
37
- # | 7457654095417343716 | モンマルトル | 0.5453000068664551 |
38
- # | 14063777960246535660 | ブローニュ | 0.5338000059127808 |
39
- # | 3297880777656467136 | トゥールーズ | 0.5275999903678894 |
40
- # | 3059066136348671923 | バスティーユ | 0.5213000178337097 |
41
- # | 2423471048892368989 | フランス人 | 0.5194000005722046 |
42
- # | 15944886306236465675 | ロレーヌ | 0.5148000121116638 |
43
- # | 9592561648283566590 | モンパルナス | 0.513700008392334 |
44
- # | 6560045335275831141 | 渡仏 | 0.5131000280380249 |
45
- # | 8597467336360225096 | イタリア | 0.5127000212669373 |
46
- # +----------------------+----------------+--------------------+
24
+ # +------+----------------+--------------------+
25
+ # | rank | text | score |
26
+ # +------+----------------+--------------------+
27
+ # | 1 | パリ | 0.7376999855041504 |
28
+ # | 2 | フランス | 0.7221999764442444 |
29
+ # | 3 | 東京 | 0.6697999835014343 |
30
+ # | 4 | ストラスブール | 0.631600022315979 |
31
+ # | 5 | リヨン | 0.5939000248908997 |
32
+ # | 6 | Paris | 0.574400007724762 |
33
+ # | 7 | ベルギー | 0.5683000087738037 |
34
+ # | 8 | ニース | 0.5679000020027161 |
35
+ # | 9 | アルザス | 0.5644999742507935 |
36
+ # | 10 | 南仏 | 0.5547999739646912 |
37
+ # | 11 | ロンドン | 0.5525000095367432 |
38
+ # | 12 | モンマルトル | 0.5453000068664551 |
39
+ # | 13 | ブローニュ | 0.5338000059127808 |
40
+ # | 14 | トゥールーズ | 0.5275999903678894 |
41
+ # | 15 | バスティーユ | 0.5213000178337097 |
42
+ # | 16 | フランス人 | 0.5194000005722046 |
43
+ # | 17 | ロレーヌ | 0.5148000121116638 |
44
+ # | 18 | モンパルナス | 0.513700008392334 |
45
+ # | 19 | 渡仏 | 0.5131000280380249 |
46
+ # | 20 | イタリア | 0.5127000212669373 |
47
+ # +------+----------------+--------------------+
@@ -10,7 +10,7 @@ headings = ["text", "start", "end", "label"]
10
10
  rows = []
11
11
 
12
12
  doc.ents.each do |ent|
13
- rows << [ent.text, ent.start_char, ent.end_char, ent.label_]
13
+ rows << [ent.text, ent.start_char, ent.end_char, ent.label]
14
14
  end
15
15
 
16
16
  table = Terminal::Table.new rows: rows, headings: headings
@@ -9,26 +9,26 @@ headings = ["text", "dep", "head text", "head pos", "children"]
9
9
  rows = []
10
10
 
11
11
  doc.each do |token|
12
- rows << [token.text, token.dep_, token.head.text, token.head.pos_, token.children.to_s]
12
+ rows << [token.text, token.dep, token.head.text, token.head.pos, token.children.map(&:text).join(", ")]
13
13
  end
14
14
 
15
15
  table = Terminal::Table.new rows: rows, headings: headings
16
16
  puts table
17
17
 
18
- # +------+----------+-----------+----------+--------------------------+
19
- # | text | dep | head text | head pos | children |
20
- # +------+----------+-----------+----------+--------------------------+
21
- # | 自動 | compound | 車 | NOUN | [] |
22
- # | 運転 | compound | 車 | NOUN | [] |
23
- # | 車 | nsubj | 転嫁 | VERB | [自動, 運転, は] |
24
- # | は | case | 車 | NOUN | [] |
25
- # | 保険 | compound | 責任 | NOUN | [] |
26
- # | 責任 | obj | 転嫁 | VERB | [保険, を] |
27
- # | を | case | 責任 | NOUN | [] |
28
- # | 製造 | compound | 者 | NOUN | [] |
29
- # | 者 | obl | 転嫁 | VERB | [製造, に] |
30
- # | に | case | 者 | NOUN | [] |
31
- # | 転嫁 | ROOT | 転嫁 | VERB | [車, 責任, 者, する, 。] |
32
- # | する | aux | 転嫁 | VERB | [] |
33
- # | 。 | punct | 転嫁 | VERB | [] |
34
- # +------+----------+-----------+----------+--------------------------+
18
+ +------+----------+-----------+----------+------------------------+
19
+ | text | dep | head text | head pos | children |
20
+ +------+----------+-----------+----------+------------------------+
21
+ | 自動 | compound | 車 | 92 | |
22
+ | 運転 | compound | 車 | 92 | |
23
+ | 車 | nsubj | 転嫁 | 100 | 自動, 運転, は |
24
+ | は | case | 車 | 92 | |
25
+ | 保険 | compound | 責任 | 92 | |
26
+ | 責任 | obj | 転嫁 | 100 | 保険, を |
27
+ | を | case | 責任 | 92 | |
28
+ | 製造 | compound | 者 | 92 | |
29
+ | 者 | obl | 転嫁 | 100 | 製造, に |
30
+ | に | case | 者 | 92 | |
31
+ | 転嫁 | ROOT | 転嫁 | 100 | 車, 責任, 者, する, 。 |
32
+ | する | aux | 転嫁 | 100 | |
33
+ | 。 | punct | 転嫁 | 100 | |
34
+ +------+----------+-----------+----------+------------------------+
@@ -9,7 +9,7 @@ headings = ["text", "root.text", "root.dep", "root.head.text"]
9
9
  rows = []
10
10
 
11
11
  doc.noun_chunks.each do |chunk|
12
- rows << [chunk.text, chunk.root.text, chunk.root.dep_, chunk.root.head.text]
12
+ rows << [chunk.text, chunk.root.text, chunk.root.dep, chunk.root.head.text]
13
13
  end
14
14
 
15
15
  table = Terminal::Table.new rows: rows, headings: headings
@@ -4,31 +4,31 @@ require "terminal-table"
4
4
  nlp = Spacy::Language.new("ja_core_news_lg")
5
5
  doc = nlp.read("任天堂は1983年にファミコンを14,800円で発売した。")
6
6
 
7
- headings = ["text", "lemma", "pos", "tag", "dep", "shape", "is_alpha", "is_stop"]
7
+ headings = ["text", "lemma", "pos", "tag", "dep"]
8
8
  rows = []
9
9
 
10
10
  doc.each do |token|
11
- rows << [token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop]
11
+ rows << [token.text, token.lemma, token.pos, token.tag, token.dep]
12
12
  end
13
13
 
14
14
  table = Terminal::Table.new rows: rows, headings: headings
15
15
  puts table
16
16
 
17
- # +------------+------------+-------+--------------------------+--------+--------+----------+---------+
18
- # | text | lemma | pos | tag | dep | shape | is_alpha | is_stop |
19
- # +------------+------------+-------+--------------------------+--------+--------+----------+---------+
20
- # | 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj | xxx | true | false |
21
- # | は | は | ADP | 助詞-係助詞 | case | x | true | true |
22
- # | 1983 | 1983 | NUM | 名詞-数詞 | nummod | dddd | false | false |
23
- # | 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl | x | true | false |
24
- # | に | に | ADP | 助詞-格助詞 | case | x | true | true |
25
- # | ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj | xxxx | true | false |
26
- # | を | を | ADP | 助詞-格助詞 | case | x | true | true |
27
- # | 14,800 | 14,800 | NUM | 名詞-数詞 | fixed | dd,ddd | false | false |
28
- # | 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl | x | true | false |
29
- # | で | で | ADP | 助詞-格助詞 | case | x | true | true |
30
- # | 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT | xx | true | false |
31
- # | し | する | AUX | 動詞-非自立可能 | aux | x | true | true |
32
- # | た | た | AUX | 助動詞 | aux | x | true | true |
33
- # | 。 | 。 | PUNCT | 補助記号-句点 | punct | 。 | false | false |
34
- # +------------+------------+-------+--------------------------+--------+--------+----------+---------+
17
+ # +------------+------------+-------+--------------------------+--------+
18
+ # | text | lemma | pos | tag | dep |
19
+ # +------------+------------+-------+--------------------------+--------+
20
+ # | 任天堂 | 任天堂 | PROPN | 名詞-固有名詞-一般 | nsubj |
21
+ # | は | は | ADP | 助詞-係助詞 | case |
22
+ # | 1983 | 1983 | NUM | 名詞-数詞 | nummod |
23
+ # | 年 | 年 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
24
+ # | に | に | ADP | 助詞-格助詞 | case |
25
+ # | ファミコン | ファミコン | NOUN | 名詞-普通名詞-一般 | obj |
26
+ # | を | を | ADP | 助詞-格助詞 | case |
27
+ # | 14,800 | 14,800 | NUM | 名詞-数詞 | fixed |
28
+ # | 円 | 円 | NOUN | 名詞-普通名詞-助数詞可能 | obl |
29
+ # | で | で | ADP | 助詞-格助詞 | case |
30
+ # | 発売 | 発売 | VERB | 名詞-普通名詞-サ変可能 | ROOT |
31
+ # | し | する | AUX | 動詞-非自立可能 | aux |
32
+ # | た | た | AUX | 助動詞 | aux |
33
+ # | 。 | 。 | PUNCT | 補助記号-句点 | punct |
34
+ # +------------+------------+-------+--------------------------+--------+
@@ -6,8 +6,8 @@ nlp = Spacy::Language.new("ja_core_news_sm")
6
6
  sentence = "自動運転車は保険責任を製造者に転嫁する。"
7
7
  doc = nlp.read(sentence)
8
8
 
9
- dep_svg = doc.displacy('dep', false)
9
+ dep_svg = doc.displacy(style: 'dep', compact: false)
10
10
 
11
- File.open(File.join(File.dirname(__FILE__), "outputs/test_dep.svg"), "w") do |file|
11
+ File.open(File.join(File.dirname(__FILE__), "test_dep.svg"), "w") do |file|
12
12
  file.write(dep_svg)
13
13
  end
@@ -7,7 +7,7 @@ sentence ="セバスチアン・スランが2007年にグーグルで自動運
7
7
 
8
8
  doc = nlp.read(sentence)
9
9
 
10
- ent_html = doc.displacy('ent')
10
+ ent_html = doc.displacy(style: 'ent')
11
11
 
12
12
  File.open(File.join(File.dirname(__FILE__), "outputs/test_ent.html"), "w") do |file|
13
13
  file.write(ent_html)
@@ -24,18 +24,21 @@ puts "The subject of the sentence is: " + subject.text
24
24
  subject.subtree.each do |descendant|
25
25
  # need to convert "ancestors" object from a python generator to a ruby array
26
26
  ancestors = Spacy::generator_to_array(descendant.ancestors)
27
- rows << [descendant.text, descendant.dep_, descendant.n_lefts, descendant.n_rights, ancestors]
27
+ rows << [descendant.text, descendant.dep, descendant.n_lefts, descendant.n_rights, ancestors.map(&:text).join(", ")]
28
28
  end
29
29
 
30
30
  table = Terminal::Table.new rows: rows, headings: headings
31
31
  print table
32
32
 
33
- # +----------+----------+---------+----------+------------------------------------+
34
- # | text | dep | n_lefts | n_rights | ancestors |
35
- # +----------+----------+---------+----------+------------------------------------+
36
- # | Credit | nmod | 0 | 2 | [holders, submit] |
37
- # | and | cc | 0 | 0 | [Credit, holders, submit] |
38
- # | mortgage | compound | 0 | 0 | [account, Credit, holders, submit] |
39
- # | account | conj | 1 | 0 | [Credit, holders, submit] |
40
- # | holders | nsubj | 1 | 0 | [submit] |
41
- # +----------+----------+---------+----------+------------------------------------+
33
+ # The sentence: Credit and mortgage account holders must submit their requests
34
+ # The root of the sentence is: submit
35
+ # The subject of the sentence is: holders
36
+ # +----------+----------+---------+----------+----------------------------------+
37
+ # | text | dep | n_lefts | n_rights | ancestors |
38
+ # +----------+----------+---------+----------+----------------------------------+
39
+ # | Credit | nmod | 0 | 2 | holders, submit |
40
+ # | and | cc | 0 | 0 | Credit, holders, submit |
41
+ # | mortgage | compound | 0 | 0 | account, Credit, holders, submit |
42
+ # | account | conj | 1 | 0 | Credit, holders, submit |
43
+ # | holders | nsubj | 1 | 0 | submit |
44
+ # +----------+----------+---------+----------+----------------------------------+
@@ -10,7 +10,7 @@ headings = ["text", "ent_iob", "ent_iob_", "ent_type_"]
10
10
  rows = []
11
11
 
12
12
  doc.each do |ent|
13
- rows << [ent.text, ent.ent_iob, ent.ent_iob_, ent.ent_type_]
13
+ rows << [ent.text, ent.ent_iob, ent.ent_iob_, ent.ent_type]
14
14
  end
15
15
 
16
16
  table = Terminal::Table.new rows: rows, headings: headings
@@ -10,11 +10,11 @@ results = []
10
10
 
11
11
  doc.each do |token|
12
12
  if token.dep_ == "nsubj" && token.head.pos_ == "VERB"
13
- results << token.head
13
+ results << token.head.text
14
14
  end
15
15
  end
16
16
 
17
17
  puts results.to_s
18
18
 
19
- # [shift]
19
+ # ["shift"]
20
20
 
@@ -19,11 +19,11 @@ texts.each do |text|
19
19
  doc.each do |token|
20
20
  if token.ent_type_ == "MONEY"
21
21
  if ["attr", "dobj"].index token.dep_
22
- subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep_ == "nsubj"}
22
+ subj = Spacy.generator_to_array(token.head.lefts).select{|t| t.dep == "nsubj"}
23
23
  if !subj.empty?
24
24
  puts(subj[0].text + " --> " + token.text)
25
25
  end
26
- elsif token.dep_ == "pobj" and token.head.dep_ == "prep"
26
+ elsif token.dep_ == "pobj" and token.head.dep == "prep"
27
27
  puts token.head.head.text + " --> " + token.text
28
28
  end
29
29
  end
@@ -12,7 +12,7 @@ doc.each do |token|
12
12
  if token.pos_ == "VERB"
13
13
  token.children.each do |child|
14
14
  if child.dep_ == "nsubj"
15
- results << child.head
15
+ results << child.head.text
16
16
  end
17
17
  end
18
18
  end
@@ -20,5 +20,5 @@ end
20
20
 
21
21
  puts results.to_s
22
22
 
23
- # [shift]
23
+ # ["shift"]
24
24
 
@@ -5,16 +5,16 @@ nlp = Spacy::Language.new("en_core_web_sm")
5
5
 
6
6
  doc = nlp.read("bright red apples on the tree")
7
7
 
8
- puts "Text: " + doc
8
+ puts "Text: " + doc.text
9
9
 
10
- puts "Words to the left of 'apple': " + Spacy.generator_to_array(doc[2].lefts).to_s
11
- puts "Words to the right of 'apple': " + Spacy.generator_to_array(doc[2].rights).to_s
10
+ puts "Words to the left of 'apple': " + doc[2].lefts.map(&:text).join(", ")
11
+ puts "Words to the right of 'apple': " + doc[2].rights.map(&:text).join(", ")
12
12
 
13
13
  puts "Num of the words to the left of 'apple': " + doc[2].n_lefts.to_s
14
14
  puts "Num of the words to the right of 'apple': " + doc[2].n_rights.to_s
15
15
 
16
16
  # Text: bright red apples on the tree
17
- # Words to the left of 'apple': [bright, red]
18
- # Words to the right of 'apple': [on]
17
+ # Words to the left of 'apple': bright, red
18
+ # Words to the right of 'apple': on
19
19
  # Num of the words to the left of 'apple': 2
20
20
  # Num of the words to the right of 'apple': 1
@@ -12,7 +12,7 @@ headings = ["lemma"]
12
12
  rows = []
13
13
 
14
14
  doc.each do |token|
15
- rows << [token.lemma_]
15
+ rows << [token.lemma]
16
16
  end
17
17
 
18
18
  table = Terminal::Table.new rows: rows, headings: headings