treat 0.2.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +3 -3
- data/README.md +33 -0
- data/files/INFO +1 -0
- data/lib/treat.rb +40 -105
- data/lib/treat/ai.rb +12 -0
- data/lib/treat/ai/classifiers/id3.rb +27 -0
- data/lib/treat/categories.rb +82 -35
- data/lib/treat/categorizable.rb +44 -0
- data/lib/treat/classification.rb +61 -0
- data/lib/treat/configurable.rb +115 -0
- data/lib/treat/data_set.rb +42 -0
- data/lib/treat/dependencies.rb +24 -0
- data/lib/treat/downloader.rb +87 -0
- data/lib/treat/entities.rb +68 -66
- data/lib/treat/entities/abilities.rb +10 -0
- data/lib/treat/entities/abilities/buildable.rb +327 -0
- data/lib/treat/entities/abilities/checkable.rb +31 -0
- data/lib/treat/entities/abilities/copyable.rb +45 -0
- data/lib/treat/entities/abilities/countable.rb +51 -0
- data/lib/treat/entities/abilities/debuggable.rb +83 -0
- data/lib/treat/entities/abilities/delegatable.rb +123 -0
- data/lib/treat/entities/abilities/doable.rb +62 -0
- data/lib/treat/entities/abilities/exportable.rb +11 -0
- data/lib/treat/entities/abilities/iterable.rb +115 -0
- data/lib/treat/entities/abilities/magical.rb +83 -0
- data/lib/treat/entities/abilities/registrable.rb +74 -0
- data/lib/treat/entities/abilities/stringable.rb +91 -0
- data/lib/treat/entities/entities.rb +104 -0
- data/lib/treat/entities/entity.rb +122 -245
- data/lib/treat/exception.rb +4 -4
- data/lib/treat/extractors.rb +77 -80
- data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
- data/lib/treat/extractors/language/what_language.rb +50 -45
- data/lib/treat/extractors/name_tag/stanford.rb +55 -0
- data/lib/treat/extractors/tf_idf/native.rb +87 -0
- data/lib/treat/extractors/time/chronic.rb +55 -0
- data/lib/treat/extractors/time/nickel.rb +86 -62
- data/lib/treat/extractors/time/ruby.rb +53 -0
- data/lib/treat/extractors/topic_words/lda.rb +67 -58
- data/lib/treat/extractors/topics/reuters.rb +100 -87
- data/lib/treat/formatters.rb +39 -35
- data/lib/treat/formatters/readers/abw.rb +49 -29
- data/lib/treat/formatters/readers/autoselect.rb +37 -33
- data/lib/treat/formatters/readers/doc.rb +19 -13
- data/lib/treat/formatters/readers/html.rb +52 -30
- data/lib/treat/formatters/readers/image.rb +41 -40
- data/lib/treat/formatters/readers/odt.rb +59 -45
- data/lib/treat/formatters/readers/pdf.rb +28 -25
- data/lib/treat/formatters/readers/txt.rb +12 -15
- data/lib/treat/formatters/readers/xml.rb +73 -36
- data/lib/treat/formatters/serializers/xml.rb +80 -79
- data/lib/treat/formatters/serializers/yaml.rb +19 -18
- data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
- data/lib/treat/formatters/unserializers/xml.rb +94 -99
- data/lib/treat/formatters/unserializers/yaml.rb +20 -19
- data/lib/treat/formatters/visualizers/dot.rb +132 -132
- data/lib/treat/formatters/visualizers/standoff.rb +52 -44
- data/lib/treat/formatters/visualizers/tree.rb +26 -29
- data/lib/treat/groupable.rb +153 -0
- data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
- data/lib/treat/inflectors.rb +50 -45
- data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
- data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
- data/lib/treat/inflectors/declensors/active_support.rb +31 -0
- data/lib/treat/inflectors/declensors/english.rb +38 -0
- data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
- data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
- data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
- data/lib/treat/inflectors/stemmers/porter.rb +160 -0
- data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
- data/lib/treat/inflectors/stemmers/uea.rb +28 -0
- data/lib/treat/installer.rb +308 -0
- data/lib/treat/kernel.rb +105 -27
- data/lib/treat/languages.rb +122 -88
- data/lib/treat/languages/arabic.rb +15 -15
- data/lib/treat/languages/chinese.rb +15 -15
- data/lib/treat/languages/dutch.rb +15 -15
- data/lib/treat/languages/english.rb +61 -62
- data/lib/treat/languages/french.rb +19 -19
- data/lib/treat/languages/german.rb +20 -20
- data/lib/treat/languages/greek.rb +15 -15
- data/lib/treat/languages/italian.rb +16 -16
- data/lib/treat/languages/polish.rb +15 -15
- data/lib/treat/languages/portuguese.rb +15 -15
- data/lib/treat/languages/russian.rb +15 -15
- data/lib/treat/languages/spanish.rb +16 -16
- data/lib/treat/languages/swedish.rb +16 -16
- data/lib/treat/lexicalizers.rb +34 -55
- data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
- data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
- data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
- data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
- data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
- data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
- data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
- data/lib/treat/linguistics.rb +9 -0
- data/lib/treat/linguistics/categories.rb +11 -0
- data/lib/treat/linguistics/tags.rb +422 -0
- data/lib/treat/loaders/linguistics.rb +30 -0
- data/lib/treat/loaders/stanford.rb +27 -0
- data/lib/treat/object.rb +1 -0
- data/lib/treat/processors.rb +37 -44
- data/lib/treat/processors/chunkers/autoselect.rb +16 -0
- data/lib/treat/processors/chunkers/html.rb +71 -0
- data/lib/treat/processors/chunkers/txt.rb +18 -24
- data/lib/treat/processors/parsers/enju.rb +253 -208
- data/lib/treat/processors/parsers/stanford.rb +130 -131
- data/lib/treat/processors/segmenters/punkt.rb +79 -45
- data/lib/treat/processors/segmenters/stanford.rb +46 -48
- data/lib/treat/processors/segmenters/tactful.rb +43 -36
- data/lib/treat/processors/tokenizers/perl.rb +124 -92
- data/lib/treat/processors/tokenizers/ptb.rb +81 -0
- data/lib/treat/processors/tokenizers/punkt.rb +48 -42
- data/lib/treat/processors/tokenizers/stanford.rb +39 -38
- data/lib/treat/processors/tokenizers/tactful.rb +64 -55
- data/lib/treat/proxies.rb +52 -35
- data/lib/treat/retrievers.rb +26 -16
- data/lib/treat/retrievers/indexers/ferret.rb +47 -26
- data/lib/treat/retrievers/searchers/ferret.rb +69 -50
- data/lib/treat/tree.rb +241 -183
- data/spec/collection.rb +123 -0
- data/spec/document.rb +93 -0
- data/spec/entity.rb +408 -0
- data/spec/languages.rb +25 -0
- data/spec/phrase.rb +146 -0
- data/spec/samples/mathematicians/archimedes.abw +34 -0
- data/spec/samples/mathematicians/euler.html +21 -0
- data/spec/samples/mathematicians/gauss.pdf +0 -0
- data/spec/samples/mathematicians/leibniz.txt +13 -0
- data/spec/samples/mathematicians/newton.doc +0 -0
- data/spec/sandbox.rb +5 -0
- data/spec/token.rb +109 -0
- data/spec/treat.rb +52 -0
- data/spec/tree.rb +117 -0
- data/spec/word.rb +110 -0
- data/spec/zone.rb +66 -0
- data/tmp/INFO +1 -1
- metadata +100 -201
- data/INSTALL +0 -1
- data/README +0 -3
- data/TODO +0 -28
- data/lib/economist/half_cocked_basel.txt +0 -16
- data/lib/economist/hungarys_troubles.txt +0 -46
- data/lib/economist/indias_slowdown.txt +0 -15
- data/lib/economist/merkozy_rides_again.txt +0 -24
- data/lib/economist/prada_is_not_walmart.txt +0 -9
- data/lib/economist/to_infinity_and_beyond.txt +0 -15
- data/lib/ferret/_11.cfs +0 -0
- data/lib/ferret/_14.cfs +0 -0
- data/lib/ferret/_p.cfs +0 -0
- data/lib/ferret/_s.cfs +0 -0
- data/lib/ferret/_v.cfs +0 -0
- data/lib/ferret/_y.cfs +0 -0
- data/lib/ferret/segments +0 -0
- data/lib/ferret/segments_15 +0 -0
- data/lib/treat/buildable.rb +0 -157
- data/lib/treat/category.rb +0 -33
- data/lib/treat/delegatable.rb +0 -116
- data/lib/treat/doable.rb +0 -45
- data/lib/treat/entities/collection.rb +0 -14
- data/lib/treat/entities/document.rb +0 -12
- data/lib/treat/entities/phrases.rb +0 -17
- data/lib/treat/entities/tokens.rb +0 -61
- data/lib/treat/entities/zones.rb +0 -41
- data/lib/treat/extractors/coreferences/stanford.rb +0 -69
- data/lib/treat/extractors/date/chronic.rb +0 -32
- data/lib/treat/extractors/date/ruby.rb +0 -25
- data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
- data/lib/treat/extractors/language/language_extractor.rb +0 -27
- data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
- data/lib/treat/extractors/roles/naive.rb +0 -73
- data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
- data/lib/treat/extractors/statistics/position_in.rb +0 -14
- data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
- data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
- data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
- data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
- data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
- data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
- data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
- data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
- data/lib/treat/feature.rb +0 -58
- data/lib/treat/features.rb +0 -7
- data/lib/treat/formatters/visualizers/short_value.rb +0 -29
- data/lib/treat/formatters/visualizers/txt.rb +0 -45
- data/lib/treat/group.rb +0 -106
- data/lib/treat/helpers/linguistics_loader.rb +0 -18
- data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
- data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
- data/lib/treat/inflectors/declensions/english.rb +0 -319
- data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
- data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
- data/lib/treat/inflectors/stem/porter.rb +0 -162
- data/lib/treat/inflectors/stem/porter_c.rb +0 -26
- data/lib/treat/inflectors/stem/uea.rb +0 -30
- data/lib/treat/install.rb +0 -59
- data/lib/treat/languages/tags.rb +0 -377
- data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
- data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
- data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
- data/lib/treat/lexicalizers/tag/brill.rb +0 -91
- data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
- data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
- data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
- data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
- data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
- data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
- data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
- data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
- data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
- data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
- data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
- data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
- data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
- data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
- data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
- data/lib/treat/registrable.rb +0 -28
- data/lib/treat/sugar.rb +0 -50
- data/lib/treat/viewable.rb +0 -29
- data/lib/treat/visitable.rb +0 -28
- data/test/profile.rb +0 -2
- data/test/tc_entity.rb +0 -117
- data/test/tc_extractors.rb +0 -73
- data/test/tc_formatters.rb +0 -41
- data/test/tc_inflectors.rb +0 -34
- data/test/tc_lexicalizers.rb +0 -32
- data/test/tc_processors.rb +0 -50
- data/test/tc_resources.rb +0 -22
- data/test/tc_treat.rb +0 -60
- data/test/tc_tree.rb +0 -60
- data/test/tests.rb +0 -20
- data/test/texts.rb +0 -19
- data/test/texts/english/half_cocked_basel.txt +0 -16
- data/test/texts/english/hose_and_dry.doc +0 -0
- data/test/texts/english/hungarys_troubles.abw +0 -70
- data/test/texts/english/long.html +0 -24
- data/test/texts/english/long.txt +0 -22
- data/test/texts/english/medium.txt +0 -5
- data/test/texts/english/republican_nomination.pdf +0 -0
- data/test/texts/english/saving_the_euro.odt +0 -0
- data/test/texts/english/short.txt +0 -3
- data/test/texts/english/zero_sum.html +0 -111
@@ -0,0 +1,97 @@
|
|
1
|
+
# Wrapper for the Stanford POS tagger.
|
2
|
+
class Treat::Lexicalizers::Taggers::Stanford
|
3
|
+
|
4
|
+
require 'treat/loaders/stanford'
|
5
|
+
|
6
|
+
# Hold one tagger per language.
|
7
|
+
@@taggers = {}
|
8
|
+
|
9
|
+
# Hold the default options.
|
10
|
+
DefaultOptions = {
|
11
|
+
:tagger_model => nil
|
12
|
+
}
|
13
|
+
|
14
|
+
# Tag the word using one of the Stanford taggers.
|
15
|
+
def self.tag(entity, options = {})
|
16
|
+
|
17
|
+
# Tokenize the sentence/phrase.
|
18
|
+
if !entity.has_children? &&
|
19
|
+
!entity.is_a?(Treat::Entities::Token)
|
20
|
+
entity.tokenize(:stanford, options)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Handle options and initialize the tagger.
|
24
|
+
lang = entity.language
|
25
|
+
options = get_options(options, lang)
|
26
|
+
tokens, list = get_token_list(entity)
|
27
|
+
init_tagger(lang)
|
28
|
+
|
29
|
+
# Do the tagging.
|
30
|
+
i = 0
|
31
|
+
isolated_token = entity.is_a?(Treat::Entities::Token)
|
32
|
+
@@taggers[lang].apply(list).each do |tok|
|
33
|
+
tokens[i].set :tag, tok.tag
|
34
|
+
tokens[i].set :tag_set,
|
35
|
+
options[:tag_set] if isolated_token
|
36
|
+
return tok.tag if isolated_token
|
37
|
+
i += 1
|
38
|
+
end
|
39
|
+
|
40
|
+
# Handle tags for sentences and phrases.
|
41
|
+
|
42
|
+
if entity.is_a?(Treat::Entities::Sentence) ||
|
43
|
+
(entity.is_a?(Treat::Entities::Phrase) &&
|
44
|
+
!entity.parent_sentence)
|
45
|
+
entity.set :tag_set, :penn
|
46
|
+
end
|
47
|
+
|
48
|
+
if entity.is_a?(Treat::Entities::Sentence)
|
49
|
+
return 'S'
|
50
|
+
elsif entity.is_a?(Treat::Entities::Phrase)
|
51
|
+
return 'P'
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
# Initialize the tagger for a language.
|
57
|
+
def self.init_tagger(lang)
|
58
|
+
|
59
|
+
language = Treat::Languages.describe(lang)
|
60
|
+
model = StanfordCoreNLP::Config::Models[:pos][language]
|
61
|
+
model = Treat.models + 'stanford/' +
|
62
|
+
StanfordCoreNLP::Config::ModelFolders[:pos] + model
|
63
|
+
@@taggers[lang] ||=
|
64
|
+
StanfordCoreNLP::MaxentTagger.new(model)
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
# Handle the options for the tagger.
|
69
|
+
def self.get_options(options, lang)
|
70
|
+
language = Treat::Languages.describe(lang)
|
71
|
+
options = DefaultOptions.merge(options)
|
72
|
+
options[:tag_set] =
|
73
|
+
StanfordCoreNLP::Config::TagSets[language]
|
74
|
+
if options[:tagger_model]
|
75
|
+
::StanfordCoreNLP.set_model('pos.model',
|
76
|
+
options[:tagger_model])
|
77
|
+
end
|
78
|
+
options[:tag_set] =
|
79
|
+
StanfordCoreNLP::Config::TagSets[language]
|
80
|
+
options
|
81
|
+
end
|
82
|
+
|
83
|
+
# Retrieve a Java ArrayList object.
|
84
|
+
def self.get_token_list(entity)
|
85
|
+
list = StanfordCoreNLP::ArrayList.new
|
86
|
+
if entity.is_a?(Treat::Entities::Token)
|
87
|
+
tokens = [entity]
|
88
|
+
else
|
89
|
+
tokens = entity.tokens
|
90
|
+
end
|
91
|
+
tokens.each do |token|
|
92
|
+
list.add(StanfordCoreNLP::Word.new(token.to_s))
|
93
|
+
end
|
94
|
+
return tokens, list
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module Treat::Linguistics
|
2
|
+
|
3
|
+
# A list of all possible word categories.
|
4
|
+
WordCategories = [
|
5
|
+
:adjective, :adverb, :noun, :verb, :interjection,
|
6
|
+
:clitic, :coverb, :conjunction, :determiner, :particle,
|
7
|
+
:preposition, :pronoun, :number, :symbol, :punctuation,
|
8
|
+
:complementizer
|
9
|
+
]
|
10
|
+
|
11
|
+
end
|
@@ -0,0 +1,422 @@
|
|
1
|
+
module Treat::Linguistics::Tags
|
2
|
+
|
3
|
+
ClawsC5 = 0
|
4
|
+
Brown = 1
|
5
|
+
Penn = 2
|
6
|
+
Negra = 3
|
7
|
+
PennChinese = 4
|
8
|
+
Simple = 5
|
9
|
+
|
10
|
+
PTBClauseTagDescription = [
|
11
|
+
['S', 'Simple declarative clause'],
|
12
|
+
['SBAR', 'Clause introduced by a (possibly empty) subordinating conjunction'],
|
13
|
+
['SBARQ', 'Direct question introduced by a wh-word or a wh-phrase'],
|
14
|
+
['SINV', 'Inverted declarative sentence'],
|
15
|
+
['SQ', 'Inverted yes/no question']
|
16
|
+
]
|
17
|
+
|
18
|
+
PTBEscapeCharacters = {
|
19
|
+
'(' => '-LRB-',
|
20
|
+
')' => '-RRB-',
|
21
|
+
'[' => '-LSB-',
|
22
|
+
']' => '-RSB-',
|
23
|
+
'{' => '-LCB-',
|
24
|
+
'}' => '-RCB-'
|
25
|
+
}
|
26
|
+
|
27
|
+
AlignedPhraseTags =
|
28
|
+
[
|
29
|
+
'Adjective phrase', ['', '', 'ADJP'],
|
30
|
+
'Adverb phrase', ['', '', 'ADVP'],
|
31
|
+
'Conjunction phrase', ['', '', 'CONJP'],
|
32
|
+
'Fragment', ['', '', 'FRAG'],
|
33
|
+
'Interjection', ['', '', 'INTJ'],
|
34
|
+
'List marker', ['', '', 'LST'],
|
35
|
+
'Not a phrase', ['', '', 'NAC'],
|
36
|
+
'Noun phrase', ['', '', 'NP'],
|
37
|
+
'Head of NP', ['', '', 'NX'],
|
38
|
+
'Prepositional phrase', ['', '', 'PP'],
|
39
|
+
'Parenthetical', ['', '', 'PRN'],
|
40
|
+
'Particle', ['', '', 'PRT'],
|
41
|
+
'Quantifier phrase', ['', '', 'QP'],
|
42
|
+
'Reduced relative clause', ['', '', 'RRC'],
|
43
|
+
'Unlike coordinated phrase', ['', '', 'UCP'],
|
44
|
+
'Verb phrase', ['', '', 'VP'],
|
45
|
+
'Wh adjective phrase', ['', '', 'WHADJP'],
|
46
|
+
'Wh adverb phrase', ['', '', 'WHAVP'],
|
47
|
+
'Wh noun phrase', ['', '', 'WHNP'],
|
48
|
+
'Wh prepositional phrase', ['', '', 'WHPP'],
|
49
|
+
'Unknown', ['', '', 'X'],
|
50
|
+
'Phrase', ['', '', 'P'],
|
51
|
+
'Sentence', ['', '', 'S'],
|
52
|
+
'Phrase', ['', '', 'SBAR'] # Fix
|
53
|
+
]
|
54
|
+
|
55
|
+
# A description of Enju categories.
|
56
|
+
EnjuCatDescription = [
|
57
|
+
['ADJ', 'Adjective'],
|
58
|
+
['ADV', 'Adverb'],
|
59
|
+
['CONJ', 'Coordination conjunction'],
|
60
|
+
['C', 'Complementizer'],
|
61
|
+
['D', 'Determiner'],
|
62
|
+
['N', 'Noun'],
|
63
|
+
['P', 'Preposition'],
|
64
|
+
['SC', 'Subordination conjunction'],
|
65
|
+
['V', 'Verb'],
|
66
|
+
['COOD', 'Part of coordination'],
|
67
|
+
['PN', 'Punctuation'],
|
68
|
+
['PRT', 'Particle'],
|
69
|
+
['S', 'Sentence']
|
70
|
+
]
|
71
|
+
|
72
|
+
# Maps Enju categories to Treat categories.
|
73
|
+
EnjuCatToCategory = {
|
74
|
+
'ADJ' => :adjective,
|
75
|
+
'ADV' => :adverb,
|
76
|
+
'CONJ' => :conjunction,
|
77
|
+
'COOD' => :conjunction,
|
78
|
+
'C' => :complementizer,
|
79
|
+
'D' => :determiner,
|
80
|
+
'N' => :noun,
|
81
|
+
'P' => :preposition,
|
82
|
+
'PN' => :punctuation,
|
83
|
+
'SC' => :conjunction,
|
84
|
+
'V' => :verb,
|
85
|
+
'PRT' => :particle
|
86
|
+
}
|
87
|
+
|
88
|
+
# Description of the xcat in the Enju output specification.
|
89
|
+
EnjuXCatDescription = [
|
90
|
+
['COOD', 'Coordinated phrase/clause'],
|
91
|
+
['IMP', 'Imperative sentence'],
|
92
|
+
['INV', 'Subject-verb inversion'],
|
93
|
+
['Q', 'Interrogative sentence with subject-verb inversion'],
|
94
|
+
['REL', 'A relativizer included'],
|
95
|
+
['FREL', 'A free relative included'],
|
96
|
+
['TRACE', 'A trace included'],
|
97
|
+
['WH', 'A wh-question word included']
|
98
|
+
]
|
99
|
+
|
100
|
+
EnjuCatXcatToPTB = [
|
101
|
+
['ADJP', '', 'ADJP'],
|
102
|
+
['ADJP', 'REL', 'WHADJP'],
|
103
|
+
['ADJP', 'FREL', 'WHADJP'],
|
104
|
+
['ADJP', 'WH', 'WHADJP'],
|
105
|
+
['ADVP', '', 'ADVP'],
|
106
|
+
['ADVP', 'REL', 'WHADVP'],
|
107
|
+
['ADVP', 'FREL', 'WHADVP'],
|
108
|
+
['ADVP', 'WH', 'WHADVP'],
|
109
|
+
['CONJP', '', 'CONJP'],
|
110
|
+
['CP', '', 'SBAR'],
|
111
|
+
['DP', '', 'NP'],
|
112
|
+
['NP', '', 'NP'],
|
113
|
+
['NX', 'NX', 'NAC'],
|
114
|
+
['NP' 'REL' 'WHNP'],
|
115
|
+
['NP' 'FREL' 'WHNP'],
|
116
|
+
['NP' 'WH' 'WHNP'],
|
117
|
+
['PP', '', 'PP'],
|
118
|
+
['PP', 'REL', 'WHPP'],
|
119
|
+
['PP', 'WH', 'WHPP'],
|
120
|
+
['PRT', '', 'PRT'],
|
121
|
+
['S', '', 'S'],
|
122
|
+
['S', 'INV', 'SINV'],
|
123
|
+
['S', 'Q', 'SQ'],
|
124
|
+
['S', 'REL', 'SBAR'],
|
125
|
+
['S', 'FREL', 'SBAR'],
|
126
|
+
['S', 'WH', 'SBARQ'],
|
127
|
+
['SCP', '', 'SBAR'],
|
128
|
+
['VP', '', 'VP'],
|
129
|
+
['VP', '', 'VP'],
|
130
|
+
['', '', 'UK']
|
131
|
+
]
|
132
|
+
|
133
|
+
# Aligned tags for the Claws C5, Brown and Penn tag sets.
|
134
|
+
# Adapted from Manning, Christopher and Schütze, Hinrich,
|
135
|
+
# 1999. Foundations of Statistical Natural Language
|
136
|
+
# Processing. MIT Press, p. 141-142;
|
137
|
+
# http://www.isocat.org/rest/dcs/376;
|
138
|
+
#
|
139
|
+
# JRS?
|
140
|
+
|
141
|
+
|
142
|
+
SimpleWordTagToCategory = {
|
143
|
+
'C' => :complementizer,
|
144
|
+
'PN' => :punctuation,
|
145
|
+
'SC' => :conjunction
|
146
|
+
}
|
147
|
+
|
148
|
+
PunctuationToCategory = {
|
149
|
+
'.' => :period,
|
150
|
+
',' => :comma,
|
151
|
+
';' => :semicolon,
|
152
|
+
':' => :colon,
|
153
|
+
'!' => :exclamation,
|
154
|
+
'?' => :interrogation,
|
155
|
+
'"' => :quote,
|
156
|
+
"'" => :quote,
|
157
|
+
|
158
|
+
'$' => :dollar,
|
159
|
+
'%' => :percent,
|
160
|
+
'#' => :hash,
|
161
|
+
'*' => :asterisk,
|
162
|
+
'&' => :ampersand,
|
163
|
+
'+' => :plus,
|
164
|
+
'-' => :dash,
|
165
|
+
|
166
|
+
'/' => :slash,
|
167
|
+
'\\' => :backslash,
|
168
|
+
'^' => :caret,
|
169
|
+
'_' => :underscore,
|
170
|
+
'`' => :tick,
|
171
|
+
'|' => :pipe,
|
172
|
+
'~' => :tilde,
|
173
|
+
'@' => :at,
|
174
|
+
|
175
|
+
'[' => :bracket,
|
176
|
+
']' => :bracket,
|
177
|
+
'{' => :brace,
|
178
|
+
'}' => :brace,
|
179
|
+
'(' => :parenthesis,
|
180
|
+
')' => :parenthesis,
|
181
|
+
|
182
|
+
'<' => :tag,
|
183
|
+
'>' => :tag
|
184
|
+
}
|
185
|
+
|
186
|
+
AlignedWordTags = [
|
187
|
+
|
188
|
+
'Adjective', ['AJ0', 'JJ', 'JJ', '', 'JJ', 'A'],
|
189
|
+
'Adjective', ['AJ0', 'JJ', 'JJ', '', 'JJ', 'ADJ'],
|
190
|
+
'Ajective, adverbial or predicative', ['', '', '', 'ADJD', '', 'ADJ'],
|
191
|
+
'Adjective, attribute', ['', '', '', 'ADJA', 'VA', 'ADJ'],
|
192
|
+
'Adjective, ordinal number', ['ORD', 'OD', 'JJ', '', 'OD', 'ADJ'],
|
193
|
+
'Adjective, comparative', ['AJC', 'JJR', 'JJR', 'KOKOM', '', 'ADJ'],
|
194
|
+
'Adjective, superlative', ['AJS', 'JJT', 'JJS', '', 'JJ', 'ADJ'],
|
195
|
+
'Adjective, superlative, semantically', ['AJ0', 'JJS', 'JJ', '', '', 'ADJ'],
|
196
|
+
'Adjective, cardinal number', ['CRD', 'CD', 'CD', 'CARD', 'CD', 'ADJ'],
|
197
|
+
'Adjective, cardinal number, one', ['PNI', 'CD', 'CD', 'CARD', 'CD', 'ADJ'],
|
198
|
+
|
199
|
+
'Adverb', ['AV0', 'RB', 'RB', 'ADV', 'AD', 'ADV'],
|
200
|
+
'Adverb, negative', ['XX0', '*', 'RB', 'PTKNEG', '', 'ADV'],
|
201
|
+
'Adverb, comparative', ['AV0', 'RBR', 'RBR', '', 'AD', 'ADV'],
|
202
|
+
'Adverb, superlative', ['AV0', 'RBT', 'RBS', '', 'AD', 'ADV'],
|
203
|
+
'Adverb, particle', ['AVP', 'RP', 'RP', '', '', 'ADV'],
|
204
|
+
'Adverb, question', ['AVQ', 'WRB', 'WRB', '', 'AD', 'ADV'],
|
205
|
+
'Adverb, degree & question', ['AVQ', 'WQL', 'WRB', '', 'ADV'],
|
206
|
+
'Adverb, degree', ['AV0', 'QL', 'RB', '', '', 'ADV'],
|
207
|
+
'Adverb, degree, postposed', ['AV0', 'QLP', 'RB', '', '', 'ADV'],
|
208
|
+
'Adverb, nominal', ['AV0', 'RN', 'RB', 'PROP', '', 'ADV'],
|
209
|
+
'Adverb, pronominal', ['', '', '', '', 'PROP', '', 'ADV'],
|
210
|
+
|
211
|
+
'Conjunction, coordination', ['CJC', 'CC', 'CC', 'KON', 'CC', 'COOD'],
|
212
|
+
'Conjunction, coordination, and', ['CJC', 'CC', 'CC', 'KON', 'CC', 'ET'],
|
213
|
+
'Conjunction, subordination', ['CJS', 'CS', 'IN', 'KOUS', 'CS', 'CONJ'],
|
214
|
+
'Conjunction, subordination with to and infinitive', ['', '', '', 'KOUI', '', ''],
|
215
|
+
'Conjunction, complementizer, that', ['CJT', 'CS', 'IN', '', '', 'C'],
|
216
|
+
|
217
|
+
'Determiner', ['DT0', 'DT', 'DT', '', 'DT', 'D'],
|
218
|
+
'Determiner, pronoun', ['DT0', 'DTI', 'DT', '', '', 'D'],
|
219
|
+
'Determiner, pronoun, plural', ['DT0', 'DTS', 'DT', '', '', 'D'],
|
220
|
+
'Determiner, prequalifier', ['DT0', 'ABL', 'DT', '', '', 'D'],
|
221
|
+
'Determiner, prequantifier', ['DT0', 'ABN', 'PDT', '', 'DT', 'D'],
|
222
|
+
'Determiner, pronoun or double conjunction', ['DT0', 'ABX', 'PDT', '', '', 'D'],
|
223
|
+
'Determiner, pronoun or double conjunction', ['DT0', 'DTX', 'DT', '', '', 'D'],
|
224
|
+
'Determiner, article', ['AT0', 'AT', 'DT', 'ART', '', 'D'],
|
225
|
+
'Determiner, postdeterminer', ['DT0', 'AP', 'DT', '', '', 'D'],
|
226
|
+
'Determiner, possessive', ['DPS', 'PP$', 'PRP$', '', '', 'D'],
|
227
|
+
'Determiner, possessive, second', ['DPS', 'PP$', 'PRPS', '', '', 'D'],
|
228
|
+
'Determiner, question', ['DTQ', 'WDT', 'WDT', '', 'DT', 'D'],
|
229
|
+
'Determiner, possessive & question', ['DTQ', 'WP$', 'WP$', '', '', 'D'],
|
230
|
+
|
231
|
+
'Localizer', ['', '', '', '', 'LC'],
|
232
|
+
|
233
|
+
'Measure word', ['', '', '', '', 'M'],
|
234
|
+
|
235
|
+
'Noun, common', ['NN0', 'NN', 'NN', 'N', 'NN', 'NN'],
|
236
|
+
'Noun, singular', ['NN1', 'NN', 'NN', 'NN', 'NN', 'N'],
|
237
|
+
'Noun, plural', ['NN2', 'NNS', 'NNS', 'NN', 'NN', 'N'],
|
238
|
+
'Noun, proper, singular', ['NP0', 'NP', 'NNP', 'NE', 'NR', 'N'],
|
239
|
+
'Noun, proper, plural', ['NP0', 'NPS', 'NNPS', 'NE', 'NR', 'N'],
|
240
|
+
'Noun, adverbial', ['NN0', 'NR', 'NN', 'NE', '', 'N'],
|
241
|
+
'Noun, adverbial, plural', ['NN2', 'NRS', 'NNS', '', 'N'],
|
242
|
+
'Noun, temporal', ['', '', '', '', 'NT', 'N'],
|
243
|
+
'Noun, verbal', ['', '', '', '', 'NN', 'N'],
|
244
|
+
|
245
|
+
'Pronoun, nominal (indefinite)', ['PNI', 'PN', 'PRP', '', 'PN', 'CL'],
|
246
|
+
'Pronoun, personal, subject', ['PNP', 'PPSS', 'PRP', 'PPER'],
|
247
|
+
'Pronoun, personal, subject, 3SG', ['PNP', 'PPS', 'PRP', 'PPER'],
|
248
|
+
'Pronoun, personal, object', ['PNP', 'PPO', 'PRP', 'PPER'],
|
249
|
+
'Pronoun, reflexive', ['PNX', 'PPL', 'PRP', 'PRF'],
|
250
|
+
'Pronoun, reflexive, plural', ['PNX', 'PPLS', 'PRP', 'PRF'],
|
251
|
+
'Pronoun, question, subject', ['PNQ', 'WPS', 'WP', 'PWAV'],
|
252
|
+
'Pronoun, question, subject', ['PNQ', 'WPS', 'WPS', 'PWAV'], # Hack
|
253
|
+
'Pronoun, question, object', ['PNQ', 'WPO', 'WP', 'PWAV', 'PWAT'],
|
254
|
+
'Pronoun, existential there', ['EX0', 'EX', 'EX'],
|
255
|
+
'Pronoun, attributive demonstrative', ['', '', '', 'PDAT'],
|
256
|
+
'Prounoun, attributive indefinite without determiner', ['', '', '', 'PIAT'],
|
257
|
+
'Pronoun, attributive possessive', ['', '', '', 'PPOSAT', ''],
|
258
|
+
'Pronoun, substituting demonstrative', ['', '', '', 'PDS'],
|
259
|
+
'Pronoun, substituting possessive', ['', '', '', 'PPOSS', ''],
|
260
|
+
'Prounoun, substituting indefinite', ['', '', '', 'PIS'],
|
261
|
+
'Pronoun, attributive relative', ['', '', '', 'PRELAT', ''],
|
262
|
+
'Pronoun, substituting relative', ['', '', '', 'PRELS', ''],
|
263
|
+
'Pronoun, attributive interrogative', ['', '', '', 'PWAT'],
|
264
|
+
'Pronoun, adverbial interrogative', ['', '', '', 'PWAV'],
|
265
|
+
|
266
|
+
'Pronoun, substituting interrogative', ['', '', '', 'PWS'],
|
267
|
+
'Verb, main, finite', ['', '', '', 'VVFIN', '', 'V'],
|
268
|
+
'Verb, main, infinitive', ['', '', '', 'VVINF', '', 'V'],
|
269
|
+
'Verb, main, imperative', ['', '', '', 'VVIMP', '', 'V'],
|
270
|
+
'Verb, base present form (not infinitive)', ['VVB', 'VB', 'VBP', '', '', 'V'],
|
271
|
+
'Verb, infinitive', ['VVI', 'VB', 'VB', 'V', '', 'V'],
|
272
|
+
'Verb, past tense', ['VVD', 'VBD', 'VBD', '', '', 'V'],
|
273
|
+
'Verb, present participle', ['VVG', 'VBG', 'VBG', 'VAPP', '', 'V'],
|
274
|
+
'Verb, past/passive participle', ['VVN', 'VBN', 'VBN', 'VVPP', '', 'V'],
|
275
|
+
'Verb, present, 3SG, -s form', ['VVZ', 'VBZ', 'VBZ', '', '', 'V'],
|
276
|
+
'Verb, auxiliary', ['', '', '', 'VAFIN', '', 'V'],
|
277
|
+
'Verb, imperative', ['', '', '', 'VAIMP', '', 'V'],
|
278
|
+
'Verb, imperative infinitive', ['', '', '', 'VAINF', '', 'V'],
|
279
|
+
'Verb, auxiliary do, base', ['VDB', 'DO', 'VBP', '', '', 'V'],
|
280
|
+
'Verb, auxiliary do, infinitive', ['VDB', 'DO', 'VB', '', '', 'V'],
|
281
|
+
'Verb, auxiliary do, past', ['VDD', 'DOD', 'VBD', '', '', 'V'],
|
282
|
+
'Verb, auxiliary do, present participle', ['VDG', 'VBG', 'VBG', '', '', 'V'],
|
283
|
+
'Verb, auxiliary do, past participle', ['VDN', 'VBN', 'VBN', '', '', 'V'],
|
284
|
+
'Verb, auxiliary do, present 3SG', ['VDZ', 'DOZ', 'VBZ', '', '', 'V'],
|
285
|
+
'Verb, auxiliary have, base', ['VHB', 'HV', 'VBP', 'VA', '', 'V'],
|
286
|
+
'Verb, auxiliary have, infinitive', ['VHI', 'HV', 'VB', 'VAINF', '', 'V'],
|
287
|
+
'Verb, auxiliary have, past', ['VHD', 'HVD', 'VBD', 'VA', '', 'V'],
|
288
|
+
'Verb, auxiliary have, present participle', ['VHG', 'HVG', 'VBG', 'VA', '', 'V'],
|
289
|
+
'Verb, auxiliary have, past participle', ['VHN', 'HVN', 'VBN', 'VAPP', '', 'V'],
|
290
|
+
'Verb, auxiliary have, present 3SG', ['VHZ', 'HVZ', 'VBZ', 'VA', '', 'V'],
|
291
|
+
'Verb, auxiliary be, infinitive', ['VBI', 'BE', 'VB', '', '', 'V'],
|
292
|
+
'Verb, auxiliary be, past', ['VBD', 'BED', 'VBD', '', '', 'V'],
|
293
|
+
'Verb, auxiliary be, past, 3SG', ['VBD', 'BEDZ', 'VBD', '', '', 'V'],
|
294
|
+
'Verb, auxiliary be, present participle', ['VBG', 'BEG', 'VBG', '', '', 'V'],
|
295
|
+
'Verb, auxiliary be, past participle', ['VBN', 'BEN', 'VBN', '', '', 'V'],
|
296
|
+
'Verb, auxiliary be, present, 3SG', ['VBZ', 'BEZ', 'VBZ', '', '', 'V'],
|
297
|
+
'Verb, auxiliary be, present, 1SG', ['VBB', 'BEM', 'VBP', '', '', 'V'],
|
298
|
+
'Verb, auxiliary be, present', ['VBB', 'BER', 'VBP', '', '', 'V'],
|
299
|
+
'Verb, modal', ['VM0', 'MD', 'MD', 'VMFIN', 'VV', 'V'],
|
300
|
+
'Verb, modal', ['VM0', 'MD', 'MD', 'VMINF', 'VV', 'V'],
|
301
|
+
'Verb, modal, finite', ['', '', '', '', 'VMFIN', 'V'],
|
302
|
+
'Verb, modal, infinite', ['', '', '', '', 'VMINF', 'V'],
|
303
|
+
'Verb, modal, past participle', ['', '', '', '', 'VMPP', 'V'],
|
304
|
+
|
305
|
+
'Particle', ['', '', '', '', '', 'PRT'],
|
306
|
+
'Particle, with adverb', ['', '', '', 'PTKA', '', 'PRT'],
|
307
|
+
'Particle, answer', ['', '', '', 'PTKANT', '', 'PRT'],
|
308
|
+
'Particle, negation', ['', '', '', 'PTKNEG', '', 'PRT'],
|
309
|
+
'Particle, separated verb', ['', '', '', 'PTKVZ', '', 'PRT'],
|
310
|
+
'Particle, to as infinitive marker', ['TO0', 'TO', 'TO', 'PTKZU', '', 'PRT'],
|
311
|
+
|
312
|
+
'Preposition, comparative', ['', '', '', 'KOKOM', '', 'P'],
|
313
|
+
'Preposition, to', ['PRP', 'IN', 'TO', '', '', 'P'],
|
314
|
+
'Preposition', ['PRP', 'IN', 'IN', 'APPR', 'P', 'P'],
|
315
|
+
'Preposition, with aritcle', ['', '', '', 'APPART', '', 'P'],
|
316
|
+
'Preposition, of', ['PRF', 'IN', 'IN', '', '', 'P'],
|
317
|
+
|
318
|
+
'Possessive', ['POS', '$', 'POS'],
|
319
|
+
|
320
|
+
'Postposition', ['', '', '', 'APPO'],
|
321
|
+
|
322
|
+
'Circumposition, right', ['', '', '', 'APZR', ''],
|
323
|
+
|
324
|
+
'Interjection, onomatopoeia or other isolate', ['ITJ', 'UH', 'UH', 'ITJ', 'IJ'],
|
325
|
+
|
326
|
+
'Onomatopoeia', ['', '', '', '', 'ON'],
|
327
|
+
|
328
|
+
'Punctuation', ['', '', '', '', 'PU', 'PN'],
|
329
|
+
'Punctuation, sentence ender', ['PUN', '.', '.', '', '', 'PN'],
|
330
|
+
|
331
|
+
'Punctuation, semicolon', ['PUN', '.', '.', '', '', 'PN'],
|
332
|
+
'Puncutation, colon or ellipsis', ['PUN', ':', ':'],
|
333
|
+
'Punctuationm, comma', ['PUN', ',', ',', '$,'],
|
334
|
+
'Punctuation, dash', ['PUN', '-', '-'],
|
335
|
+
'Punctuation, dollar sign', ['PUN', '', '$'],
|
336
|
+
'Punctuation, left bracket', ['PUL', '(', '(', '$('],
|
337
|
+
'Punctuation, right bracket', ['PUR', ')', ')'],
|
338
|
+
'Punctuation, quotation mark, left', ['PUQ', '', '``'],
|
339
|
+
'Punctuation, quotation mark, right', ['PUQ', '', '"'],
|
340
|
+
|
341
|
+
'Punctuation, left bracket', ['PUL', '(', 'PPL'],
|
342
|
+
'Punctuation, right bracket', ['PUR', ')', 'PPR'],
|
343
|
+
'Punctuation, left square bracket', ['PUL', '(', 'LSB'],
|
344
|
+
'Punctuation, right square bracket', ['PUR', ')', 'RSB'],
|
345
|
+
'Punctuation, left curly bracket', ['PUL', '(', 'LCB'],
|
346
|
+
'Punctuation, right curly bracket', ['PUR', ')', 'RCB'],
|
347
|
+
|
348
|
+
'Unknown, foreign words (not in lexicon)', ['UNZ', '(FW-)', 'FW', '', 'FW'],
|
349
|
+
|
350
|
+
'Symbol', ['', '', 'SYM', 'XY'],
|
351
|
+
'Symbol, alphabetical', ['ZZ0', '', ''],
|
352
|
+
'Symbol, list item', ['', '', 'LS'],
|
353
|
+
|
354
|
+
# Not sure about these tags from the Chinese PTB.
|
355
|
+
'Aspect marker', ['', '', '', '', 'AS'], # ?
|
356
|
+
'Ba-construction', ['', '', '', '', 'BA'], # ?
|
357
|
+
'In relative', ['', '', '', '', 'DEC'], # ?
|
358
|
+
'Associative', ['', '', '', '', 'DER'], # ?
|
359
|
+
'In V-de or V-de-R construct', ['', '', '', '', 'DER'], # ?
|
360
|
+
'For words ? ', ['', '', '', '', 'ETC'], # ?
|
361
|
+
'In long bei-construct', ['', '', '', '', 'LB'], # ?
|
362
|
+
'In short bei-construct', ['', '', '', '', 'SB'], # ?
|
363
|
+
'Sentence-nal particle', ['', '', '', '', 'SB'], # ?
|
364
|
+
'Particle, other', ['', '', '', '', 'MSP'], # ?
|
365
|
+
'Before VP', ['', '', '', '', 'DEV'], # ?
|
366
|
+
'Verb, ? as main verb', ['', '', '', '', 'VE'], # ?
|
367
|
+
'Verb, ????', ['', '', '', '', 'VC'] # ?
|
368
|
+
]
|
369
|
+
|
370
|
+
wttc = {
|
371
|
+
|
372
|
+
}
|
373
|
+
Treat::Linguistics::Tags::AlignedWordTags.each_slice(2) do |desc, tags|
|
374
|
+
|
375
|
+
category = desc.gsub(',', ' ,').
|
376
|
+
split(' ')[0].downcase.intern
|
377
|
+
|
378
|
+
wttc[tags[ClawsC5]] ||= {}
|
379
|
+
wttc[tags[Brown]] ||= {}
|
380
|
+
wttc[tags[Penn]] ||= {}
|
381
|
+
wttc[tags[Negra]] ||= {}
|
382
|
+
wttc[tags[PennChinese]] ||= {}
|
383
|
+
wttc[tags[Simple]] ||= {}
|
384
|
+
|
385
|
+
wttc[tags[ClawsC5]][:claws_5] = category
|
386
|
+
wttc[tags[Brown]][:brown] = category
|
387
|
+
wttc[tags[Penn]][:penn] = category
|
388
|
+
wttc[tags[Negra]][:negra] = category if tags[Negra]
|
389
|
+
wttc[tags[PennChinese]][:penn_chinese] = category if tags[PennChinese]
|
390
|
+
wttc[tags[Simple]][:simple] = category if tags[Simple]
|
391
|
+
|
392
|
+
end
|
393
|
+
# A hash converting word tags to word categories.
|
394
|
+
WordTagToCategory = wttc
|
395
|
+
|
396
|
+
# A hash converting phrase tag to categories.
|
397
|
+
pttc = {}
|
398
|
+
Treat::Linguistics::Tags::AlignedPhraseTags.each_slice(2) do |desc, tags|
|
399
|
+
category = desc.gsub(',', ' ,').gsub(' ', '_').downcase.intern
|
400
|
+
pttc[tags[Penn]] ||= {};
|
401
|
+
# Not yet for other tag sts.
|
402
|
+
#pttc[tags[0]][:claws_5] = category
|
403
|
+
#pttc[tags[1]][:brown] = category
|
404
|
+
pttc[tags[Penn]][:penn] = category
|
405
|
+
end
|
406
|
+
|
407
|
+
# A hash converting word tags to word categories.
|
408
|
+
PhraseTagToCategory = pttc
|
409
|
+
|
410
|
+
def self.describe(tag, tag_set)
|
411
|
+
if PhraseTagToCategory[tag] &&
|
412
|
+
PhraseTagToCategory[tag_set] &&
|
413
|
+
WordTagToCategory[tag] &&
|
414
|
+
WordTagToCategory[tag_set]
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
def self.convert(tag, from, to)
|
419
|
+
|
420
|
+
end
|
421
|
+
|
422
|
+
end
|