nlp 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/liwc_analyzer.rb CHANGED
@@ -1,97 +1,74 @@
1
1
  module NLP
2
- class LIWCAnalyzer < Analyzer
3
-
4
- def initialize( category_file, restore = true )
5
- state_file = File.expand_path(Analyzer::CACHE_DIR+'.liwc')
6
- if restore
7
- @dictionary = Dictionary.restore(state_file)
8
- else
9
- @dictionary = Dictionary.new
10
- @dictionary.load_categories( category_file, :rid => false )
11
- @dictionary.store(state_file)
12
- end
13
-
2
+ class LIWCAnalyzer < Analyzer
3
+
4
+ def initialize(dicts)
5
+ @dictionary = Dictionary.new(:liwc)
14
6
  end
15
7
 
16
8
 
17
-
18
- def analyze(scanner)
19
-
20
- results = {
21
- :word_count => 0,
22
- :word_total => 0,
23
- :scores => Hash.new { 0 },
24
- :words => [],
25
- :cwords => Hash.new { nil },
26
- :long_words => [],
27
- :zaimki => [],
28
- :zaimki1 => [],
29
- :zaimki2 => [],
30
- :zaimki3 => [],
31
- :przyimki => [],
32
- :numbers => [],
33
- :emotion => [],
34
- :social => [],
35
- :personal => [],
36
- :posemotion => [],
37
- :negemotion => [],
38
- :wulgar => [],
39
- :cognitive => []
40
-
41
- }
42
-
43
- while token = scanner.current
44
- word = token.lemat
45
-
46
- categories = @dictionary.find( word.gsub( /[^\w-]/, "" ) )
47
- unless categories.nil?
48
- categories.each do |category|
49
- puts "Znalazłem słowo #{word} : #{category} root: #{category.root}"
50
- token.category = category
51
- results[:scores][category] = results[:scores][category] + 1
52
-
53
-
54
- if results[:cwords][category.name].nil?
55
- results[:cwords][category.name] = []
56
- end
57
- results[:cwords][category.name].push token.orth
58
-
59
-
60
- results[:emotion].push token.orth if token.emotion?
61
- results[:social].push token.orth if token.social?
62
- results[:personal].push token.orth if token.personal?
63
- results[:wulgar].push token.orth if token.bad_word?
64
- results[:cognitive].push token.orth if token.cognitive?
65
-
66
- results[:posemotion].push token.orth if token.positive_emotion?
67
- results[:negemotion].push token.orth if token.negative_emotion?
68
- results[:word_count] += 1
69
- results[:words].push word
70
- end
71
- end
72
-
73
- #words longer than 9
74
- results[:long_words].push word if word.jlength > 9
75
- if token.zaimek?
76
- results[:zaimki].push word
77
-
78
- results[:zaimki1].push token.orth if word === 'ja' or word === 'my'
79
- results[:zaimki2].push token.orth if word === 'ty' or word === 'wy'
80
- results[:zaimki3].push token.orth if word === 'on'
81
- end
82
-
83
- results[:przyimki].push word if token.przyimek?
84
- results[:numbers].push token.orth if token.number? or token.liczebnik?
85
-
86
-
87
- results[:word_total] += 1
88
- scanner.next(:alphanum)
89
- end
90
- results
91
-
92
- end
93
-
94
-
95
- end
9
+ def analyze(scanner)
10
+
11
+ results = Statistic.new
12
+ results.hash = {
13
+ :long_words => [],
14
+ :zaimki => [],
15
+ :zaimki1 => [],
16
+ :zaimki2 => [],
17
+ :zaimki3 => [],
18
+ :przyimki => [],
19
+ :numbers => [],
20
+ :emotion => [],
21
+ :social => [],
22
+ :personal => [],
23
+ :posemotion => [],
24
+ :negemotion => [],
25
+ :wulgar => [],
26
+ :cognitive => []
27
+ }
28
+
29
+ while token = scanner.current
30
+ word = token.lemat
31
+
32
+ categories = @dictionary.find(word.gsub( /[^\w-]/, "" ))
33
+ unless categories.nil?
34
+ categories.each do |category|
35
+ puts "Znalazłem słowo #{word} : #{category} root: #{category.root}"
36
+ token.category = category
37
+ results.add(word,category)
38
+
39
+
40
+ results[:emotion].push token.orth if token.emotion?
41
+ results[:social].push token.orth if token.social?
42
+ results[:personal].push token.orth if token.personal?
43
+ results[:wulgar].push token.orth if token.bad_word?
44
+ results[:cognitive].push token.orth if token.cognitive?
45
+
46
+ results[:posemotion].push token.orth if token.positive_emotion?
47
+ results[:negemotion].push token.orth if token.negative_emotion?
48
+ end
49
+ end
50
+
51
+ #words longer than 10
52
+ results[:long_words].push word if word.jlength > 10
53
+ if token.zaimek?
54
+ results[:zaimki].push word
55
+
56
+ results[:zaimki1].push token.orth if word === 'ja' or word === 'my'
57
+ results[:zaimki2].push token.orth if word === 'ty' or word === 'wy'
58
+ results[:zaimki3].push token.orth if word === 'on'
59
+ end
60
+
61
+ results[:przyimki].push word if token.przyimek?
62
+ results[:numbers].push token.orth if token.number? or token.liczebnik?
63
+
64
+
65
+ results.total_words += 1
66
+ scanner.next(:alphanum)
67
+ end
68
+ results
69
+
70
+ end
71
+
72
+ end
96
73
 
97
74
  end
data/lib/liwc_category.rb CHANGED
@@ -1,62 +1,61 @@
1
1
  module NLP
2
2
 
3
- class LIWCCategory < Category
3
+ class LIWCCategory < Category
4
4
 
5
- #primary categories
6
-
7
- def linguistic?
8
- root == :PIERWOTNE
9
- end
10
-
11
- def psychological?
12
- root == :PROCESY_PSYCHOLOGICZNE
13
- end
5
+ #primary categories
14
6
 
15
-
16
- def relative?
17
- root === :RELATYWNOSC
18
- end
19
-
20
- def personal?
21
- root == :OSOBISTE
22
- end
7
+ def linguistic?
8
+ root == :PIERWOTNE
9
+ end
10
+
11
+ def psychological?
12
+ root == :PROCESY_PSYCHOLOGICZNE
13
+ end
14
+
15
+
16
+ def relative?
17
+ root === :RELATYWNOSC
18
+ end
19
+
20
+ def personal?
21
+ root == :OSOBISTE
22
+ end
23
23
 
24
- #second categories
25
-
26
- def emotion?
27
- path.include? 'EMOCJE'
24
+ #second categories
28
25
 
29
- end
26
+ def emotion?
27
+ path.include? 'EMOCJE'
30
28
 
31
- def positive_emotion?
32
- path.include? 'POZYTYWNE_EMOCJE'
33
-
34
- end
29
+ end
35
30
 
36
- def negative_emotion?
37
- path.include? 'NEGATYWNE_EMOCJE'
31
+ def positive_emotion?
32
+ path.include? 'POZYTYWNE_EMOCJE'
38
33
 
39
- end
34
+ end
40
35
 
41
- def cognitive?
42
- path.include? 'KOGNITYWNE_PROCESY'
36
+ def negative_emotion?
37
+ path.include? 'NEGATYWNE_EMOCJE'
43
38
 
44
- end
39
+ end
45
40
 
46
- def sense?
47
- path.include? 'ZMYSLY'
48
- end
41
+ def cognitive?
42
+ path.include? 'KOGNITYWNE_PROCESY'
49
43
 
50
- def social?
51
- path.include? 'SOCIAL'
44
+ end
52
45
 
53
- end
46
+ def sense?
47
+ path.include? 'ZMYSLY'
48
+ end
54
49
 
55
- def bad_word?
56
- path.include? 'WULGAR'
57
- end
50
+ def social?
51
+ path.include? 'SOCIAL'
58
52
 
53
+ end
59
54
 
60
-
55
+ def bad_word?
56
+ path.include? 'WULGAR'
61
57
  end
58
+
59
+
60
+ end
62
61
  end
data/lib/meaningable.rb CHANGED
@@ -1,76 +1,69 @@
1
1
  module Meaningable
2
2
 
3
-
3
+ #LIWC
4
+ #primary categories
4
5
 
5
- #LIWC
6
- #primary categories
7
-
8
- def linguistic?
9
- category.root == :PIERWOTNE
10
- end
11
-
12
- def psychological?
13
- category.root == :PROCESY_PSYCHOLOGICZNE
14
- end
6
+ def linguistic?
7
+ category.root == :PIERWOTNE
8
+ end
15
9
 
16
-
17
- def relative?
18
- category.root === :RELATYWNOSC
19
- end
20
-
21
- def personal?
22
- category.root == :OSOBISTE
23
- end
10
+ def psychological?
11
+ category.root == :PROCESY_PSYCHOLOGICZNE
12
+ end
24
13
 
25
- #second categories
26
-
27
- def emotion?
28
- category.path.include? 'EMOCJE'
29
14
 
30
- end
15
+ def relative?
16
+ category.root === :RELATYWNOSC
17
+ end
31
18
 
32
- def positive_emotion?
33
- category.path.include? 'POZYTYWNE_EMOCJE'
34
-
35
- end
19
+ def personal?
20
+ category.root == :OSOBISTE
21
+ end
36
22
 
37
- def negative_emotion?
38
- category.path.include? 'NEGATYWNE_EMOCJE'
23
+ #second categories
39
24
 
40
- end
25
+ def emotion?
26
+ category.path.include? 'EMOCJE'
41
27
 
42
- def cognitive?
43
- category.path.include? 'KOGNITYWNE_PROCESY'
28
+ end
44
29
 
45
- end
30
+ def positive_emotion?
31
+ category.path.include? 'POZYTYWNE_EMOCJE'
46
32
 
47
- def sense?
48
- category.path.include? 'ZMYSLY'
49
- end
33
+ end
50
34
 
51
- def social?
52
- category.path.include? 'SOCIAL'
35
+ def negative_emotion?
36
+ category.path.include? 'NEGATYWNE_EMOCJE'
53
37
 
54
- end
38
+ end
55
39
 
56
- def bad_word?
57
- category.path.include? 'WULGAR'
58
- end
40
+ def cognitive?
41
+ category.path.include? 'KOGNITYWNE_PROCESY'
59
42
 
43
+ end
60
44
 
45
+ def sense?
46
+ category.path.include? 'ZMYSLY'
47
+ end
61
48
 
49
+ def social?
50
+ category.path.include? 'SOCIAL'
62
51
 
52
+ end
63
53
 
64
- #SEMANTIC
65
- def synonym?(other)
66
-
67
- end
54
+ def bad_word?
55
+ category.path.include? 'WULGAR'
56
+ end
68
57
 
69
- def synonyms
70
58
 
71
- end
72
-
59
+ #SEMANTIC
60
+ def synonym?(other)
61
+
62
+ end
63
+
64
+ def synonyms
65
+
66
+ end
73
67
 
74
68
 
75
-
76
69
  end
data/lib/nlp.rb CHANGED
@@ -1,4 +1,14 @@
1
+ module NLP
2
+
3
+ TAKIPI_XML_FILE = "/tmp/output.xml"
4
+ DICTIONARY_CACHE_DIR = "~/"
5
+
6
+
7
+ end
8
+
9
+
1
10
  require 'stdlib/ext/array'
2
11
  require 'stdlib/ext/string.rb'
3
12
  require 'analyzer'
4
13
 
14
+
data/lib/rid_analyzer.rb CHANGED
@@ -1,74 +1,10 @@
1
1
  module NLP
2
- class RIDAnalyzer < NLP::Analyzer
3
-
4
-
5
- def initialize( category_file, restore = true )
6
- state_file = File.expand_path(Analyzer::CACHE_DIR+'.rid')
7
- if restore
8
- @dictionary = Dictionary.restore(state_file)
9
- else
10
- @dictionary = Dictionary.new
11
- @dictionary.load_categories( category_file, :rid => true )
12
- @dictionary.store(state_file)
13
- end
14
-
15
- end
16
-
17
-
18
- def analyze(scanner)
19
- results = {
20
- :word_count => 0,
21
- :word_total => 0,
22
- :scores => Hash.new { 0 },
23
- :words => [],
24
- :cwords => Hash.new { nil }
25
- }
26
-
27
- while token = scanner.current
28
- word = token.lemat
29
-
30
- categories = @dictionary.find( word.gsub( /[^\w-]/, "" ) )
31
- unless categories.nil?
32
- categories.each do |category|
33
- puts "Znalazłem słowo #{word} : #{category} root: #{category.root}"
34
- results[:scores][category] = results[:scores][category] + 1
35
- category = category.name
36
- if results[:cwords][category].nil?
37
- results[:cwords][category] = []
38
- end
39
- results[:cwords][category].push word
40
- results[:word_count] += 1
41
- results[:words].push word
42
- end
43
-
44
-
45
- end
46
-
47
- results[:word_total] += 1
48
- scanner.next(:word)
49
- end
50
-
51
- results[:sorted_scores] = results[:scores].to_a.sort_by { |result| -result[1] }
52
- p primary_sum = results[:sorted_scores].select { |result| result[0].primary? }.inject( 0 ) { |count,result| count + result[1] }
53
- p secondary_sum = results[:sorted_scores].select { |result| result[0].secondary? }.inject( 0 ) { |count,result| count + result[1] }
54
- p emotion_sum = results[:sorted_scores].select { |result| result[0].emotions? }.inject( 0 ) { |count,result| count + result[1] }
55
-
56
-
57
-
58
- results[:classes] = {
59
- :primary => Float(primary_sum) / results[:word_count],
60
- :secondary => Float(secondary_sum) / results[:word_count],
61
- :emotions => Float(emotion_sum) / results[:word_count]
62
- }
63
-
64
- results
65
- end
66
-
67
-
68
-
69
2
 
3
+ class RIDAnalyzer < Analyzer
70
4
 
5
+ def initialize
6
+ @dictionary = Dictionary.new(:rid)
71
7
  end
72
-
73
-
8
+
9
+ end
74
10
  end