nlp 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
data/lib/liwc_analyzer.rb CHANGED
@@ -1,97 +1,74 @@
1
1
  module NLP
2
- class LIWCAnalyzer < Analyzer
3
-
4
- def initialize( category_file, restore = true )
5
- state_file = File.expand_path(Analyzer::CACHE_DIR+'.liwc')
6
- if restore
7
- @dictionary = Dictionary.restore(state_file)
8
- else
9
- @dictionary = Dictionary.new
10
- @dictionary.load_categories( category_file, :rid => false )
11
- @dictionary.store(state_file)
12
- end
13
-
2
+ class LIWCAnalyzer < Analyzer
3
+
4
+ def initialize(dicts)
5
+ @dictionary = Dictionary.new(:liwc)
14
6
  end
15
7
 
16
8
 
17
-
18
- def analyze(scanner)
19
-
20
- results = {
21
- :word_count => 0,
22
- :word_total => 0,
23
- :scores => Hash.new { 0 },
24
- :words => [],
25
- :cwords => Hash.new { nil },
26
- :long_words => [],
27
- :zaimki => [],
28
- :zaimki1 => [],
29
- :zaimki2 => [],
30
- :zaimki3 => [],
31
- :przyimki => [],
32
- :numbers => [],
33
- :emotion => [],
34
- :social => [],
35
- :personal => [],
36
- :posemotion => [],
37
- :negemotion => [],
38
- :wulgar => [],
39
- :cognitive => []
40
-
41
- }
42
-
43
- while token = scanner.current
44
- word = token.lemat
45
-
46
- categories = @dictionary.find( word.gsub( /[^\w-]/, "" ) )
47
- unless categories.nil?
48
- categories.each do |category|
49
- puts "Znalazłem słowo #{word} : #{category} root: #{category.root}"
50
- token.category = category
51
- results[:scores][category] = results[:scores][category] + 1
52
-
53
-
54
- if results[:cwords][category.name].nil?
55
- results[:cwords][category.name] = []
56
- end
57
- results[:cwords][category.name].push token.orth
58
-
59
-
60
- results[:emotion].push token.orth if token.emotion?
61
- results[:social].push token.orth if token.social?
62
- results[:personal].push token.orth if token.personal?
63
- results[:wulgar].push token.orth if token.bad_word?
64
- results[:cognitive].push token.orth if token.cognitive?
65
-
66
- results[:posemotion].push token.orth if token.positive_emotion?
67
- results[:negemotion].push token.orth if token.negative_emotion?
68
- results[:word_count] += 1
69
- results[:words].push word
70
- end
71
- end
72
-
73
- #words longer than 9
74
- results[:long_words].push word if word.jlength > 9
75
- if token.zaimek?
76
- results[:zaimki].push word
77
-
78
- results[:zaimki1].push token.orth if word === 'ja' or word === 'my'
79
- results[:zaimki2].push token.orth if word === 'ty' or word === 'wy'
80
- results[:zaimki3].push token.orth if word === 'on'
81
- end
82
-
83
- results[:przyimki].push word if token.przyimek?
84
- results[:numbers].push token.orth if token.number? or token.liczebnik?
85
-
86
-
87
- results[:word_total] += 1
88
- scanner.next(:alphanum)
89
- end
90
- results
91
-
92
- end
93
-
94
-
95
- end
9
+ def analyze(scanner)
10
+
11
+ results = Statistic.new
12
+ results.hash = {
13
+ :long_words => [],
14
+ :zaimki => [],
15
+ :zaimki1 => [],
16
+ :zaimki2 => [],
17
+ :zaimki3 => [],
18
+ :przyimki => [],
19
+ :numbers => [],
20
+ :emotion => [],
21
+ :social => [],
22
+ :personal => [],
23
+ :posemotion => [],
24
+ :negemotion => [],
25
+ :wulgar => [],
26
+ :cognitive => []
27
+ }
28
+
29
+ while token = scanner.current
30
+ word = token.lemat
31
+
32
+ categories = @dictionary.find(word.gsub( /[^\w-]/, "" ))
33
+ unless categories.nil?
34
+ categories.each do |category|
35
+ puts "Znalazłem słowo #{word} : #{category} root: #{category.root}"
36
+ token.category = category
37
+ results.add(word,category)
38
+
39
+
40
+ results[:emotion].push token.orth if token.emotion?
41
+ results[:social].push token.orth if token.social?
42
+ results[:personal].push token.orth if token.personal?
43
+ results[:wulgar].push token.orth if token.bad_word?
44
+ results[:cognitive].push token.orth if token.cognitive?
45
+
46
+ results[:posemotion].push token.orth if token.positive_emotion?
47
+ results[:negemotion].push token.orth if token.negative_emotion?
48
+ end
49
+ end
50
+
51
+ #words longer than 10
52
+ results[:long_words].push word if word.jlength > 10
53
+ if token.zaimek?
54
+ results[:zaimki].push word
55
+
56
+ results[:zaimki1].push token.orth if word === 'ja' or word === 'my'
57
+ results[:zaimki2].push token.orth if word === 'ty' or word === 'wy'
58
+ results[:zaimki3].push token.orth if word === 'on'
59
+ end
60
+
61
+ results[:przyimki].push word if token.przyimek?
62
+ results[:numbers].push token.orth if token.number? or token.liczebnik?
63
+
64
+
65
+ results.total_words += 1
66
+ scanner.next(:alphanum)
67
+ end
68
+ results
69
+
70
+ end
71
+
72
+ end
96
73
 
97
74
  end
data/lib/liwc_category.rb CHANGED
@@ -1,62 +1,61 @@
1
1
  module NLP
2
2
 
3
- class LIWCCategory < Category
3
+ class LIWCCategory < Category
4
4
 
5
- #primary categories
6
-
7
- def linguistic?
8
- root == :PIERWOTNE
9
- end
10
-
11
- def psychological?
12
- root == :PROCESY_PSYCHOLOGICZNE
13
- end
5
+ #primary categories
14
6
 
15
-
16
- def relative?
17
- root === :RELATYWNOSC
18
- end
19
-
20
- def personal?
21
- root == :OSOBISTE
22
- end
7
+ def linguistic?
8
+ root == :PIERWOTNE
9
+ end
10
+
11
+ def psychological?
12
+ root == :PROCESY_PSYCHOLOGICZNE
13
+ end
14
+
15
+
16
+ def relative?
17
+ root === :RELATYWNOSC
18
+ end
19
+
20
+ def personal?
21
+ root == :OSOBISTE
22
+ end
23
23
 
24
- #second categories
25
-
26
- def emotion?
27
- path.include? 'EMOCJE'
24
+ #second categories
28
25
 
29
- end
26
+ def emotion?
27
+ path.include? 'EMOCJE'
30
28
 
31
- def positive_emotion?
32
- path.include? 'POZYTYWNE_EMOCJE'
33
-
34
- end
29
+ end
35
30
 
36
- def negative_emotion?
37
- path.include? 'NEGATYWNE_EMOCJE'
31
+ def positive_emotion?
32
+ path.include? 'POZYTYWNE_EMOCJE'
38
33
 
39
- end
34
+ end
40
35
 
41
- def cognitive?
42
- path.include? 'KOGNITYWNE_PROCESY'
36
+ def negative_emotion?
37
+ path.include? 'NEGATYWNE_EMOCJE'
43
38
 
44
- end
39
+ end
45
40
 
46
- def sense?
47
- path.include? 'ZMYSLY'
48
- end
41
+ def cognitive?
42
+ path.include? 'KOGNITYWNE_PROCESY'
49
43
 
50
- def social?
51
- path.include? 'SOCIAL'
44
+ end
52
45
 
53
- end
46
+ def sense?
47
+ path.include? 'ZMYSLY'
48
+ end
54
49
 
55
- def bad_word?
56
- path.include? 'WULGAR'
57
- end
50
+ def social?
51
+ path.include? 'SOCIAL'
58
52
 
53
+ end
59
54
 
60
-
55
+ def bad_word?
56
+ path.include? 'WULGAR'
61
57
  end
58
+
59
+
60
+ end
62
61
  end
data/lib/meaningable.rb CHANGED
@@ -1,76 +1,69 @@
1
1
  module Meaningable
2
2
 
3
-
3
+ #LIWC
4
+ #primary categories
4
5
 
5
- #LIWC
6
- #primary categories
7
-
8
- def linguistic?
9
- category.root == :PIERWOTNE
10
- end
11
-
12
- def psychological?
13
- category.root == :PROCESY_PSYCHOLOGICZNE
14
- end
6
+ def linguistic?
7
+ category.root == :PIERWOTNE
8
+ end
15
9
 
16
-
17
- def relative?
18
- category.root === :RELATYWNOSC
19
- end
20
-
21
- def personal?
22
- category.root == :OSOBISTE
23
- end
10
+ def psychological?
11
+ category.root == :PROCESY_PSYCHOLOGICZNE
12
+ end
24
13
 
25
- #second categories
26
-
27
- def emotion?
28
- category.path.include? 'EMOCJE'
29
14
 
30
- end
15
+ def relative?
16
+ category.root === :RELATYWNOSC
17
+ end
31
18
 
32
- def positive_emotion?
33
- category.path.include? 'POZYTYWNE_EMOCJE'
34
-
35
- end
19
+ def personal?
20
+ category.root == :OSOBISTE
21
+ end
36
22
 
37
- def negative_emotion?
38
- category.path.include? 'NEGATYWNE_EMOCJE'
23
+ #second categories
39
24
 
40
- end
25
+ def emotion?
26
+ category.path.include? 'EMOCJE'
41
27
 
42
- def cognitive?
43
- category.path.include? 'KOGNITYWNE_PROCESY'
28
+ end
44
29
 
45
- end
30
+ def positive_emotion?
31
+ category.path.include? 'POZYTYWNE_EMOCJE'
46
32
 
47
- def sense?
48
- category.path.include? 'ZMYSLY'
49
- end
33
+ end
50
34
 
51
- def social?
52
- category.path.include? 'SOCIAL'
35
+ def negative_emotion?
36
+ category.path.include? 'NEGATYWNE_EMOCJE'
53
37
 
54
- end
38
+ end
55
39
 
56
- def bad_word?
57
- category.path.include? 'WULGAR'
58
- end
40
+ def cognitive?
41
+ category.path.include? 'KOGNITYWNE_PROCESY'
59
42
 
43
+ end
60
44
 
45
+ def sense?
46
+ category.path.include? 'ZMYSLY'
47
+ end
61
48
 
49
+ def social?
50
+ category.path.include? 'SOCIAL'
62
51
 
52
+ end
63
53
 
64
- #SEMANTIC
65
- def synonym?(other)
66
-
67
- end
54
+ def bad_word?
55
+ category.path.include? 'WULGAR'
56
+ end
68
57
 
69
- def synonyms
70
58
 
71
- end
72
-
59
+ #SEMANTIC
60
+ def synonym?(other)
61
+
62
+ end
63
+
64
+ def synonyms
65
+
66
+ end
73
67
 
74
68
 
75
-
76
69
  end
data/lib/nlp.rb CHANGED
@@ -1,4 +1,14 @@
1
+ module NLP
2
+
3
+ TAKIPI_XML_FILE = "/tmp/output.xml"
4
+ DICTIONARY_CACHE_DIR = "~/"
5
+
6
+
7
+ end
8
+
9
+
1
10
  require 'stdlib/ext/array'
2
11
  require 'stdlib/ext/string.rb'
3
12
  require 'analyzer'
4
13
 
14
+
data/lib/rid_analyzer.rb CHANGED
@@ -1,74 +1,10 @@
1
1
  module NLP
2
- class RIDAnalyzer < NLP::Analyzer
3
-
4
-
5
- def initialize( category_file, restore = true )
6
- state_file = File.expand_path(Analyzer::CACHE_DIR+'.rid')
7
- if restore
8
- @dictionary = Dictionary.restore(state_file)
9
- else
10
- @dictionary = Dictionary.new
11
- @dictionary.load_categories( category_file, :rid => true )
12
- @dictionary.store(state_file)
13
- end
14
-
15
- end
16
-
17
-
18
- def analyze(scanner)
19
- results = {
20
- :word_count => 0,
21
- :word_total => 0,
22
- :scores => Hash.new { 0 },
23
- :words => [],
24
- :cwords => Hash.new { nil }
25
- }
26
-
27
- while token = scanner.current
28
- word = token.lemat
29
-
30
- categories = @dictionary.find( word.gsub( /[^\w-]/, "" ) )
31
- unless categories.nil?
32
- categories.each do |category|
33
- puts "Znalazłem słowo #{word} : #{category} root: #{category.root}"
34
- results[:scores][category] = results[:scores][category] + 1
35
- category = category.name
36
- if results[:cwords][category].nil?
37
- results[:cwords][category] = []
38
- end
39
- results[:cwords][category].push word
40
- results[:word_count] += 1
41
- results[:words].push word
42
- end
43
-
44
-
45
- end
46
-
47
- results[:word_total] += 1
48
- scanner.next(:word)
49
- end
50
-
51
- results[:sorted_scores] = results[:scores].to_a.sort_by { |result| -result[1] }
52
- p primary_sum = results[:sorted_scores].select { |result| result[0].primary? }.inject( 0 ) { |count,result| count + result[1] }
53
- p secondary_sum = results[:sorted_scores].select { |result| result[0].secondary? }.inject( 0 ) { |count,result| count + result[1] }
54
- p emotion_sum = results[:sorted_scores].select { |result| result[0].emotions? }.inject( 0 ) { |count,result| count + result[1] }
55
-
56
-
57
-
58
- results[:classes] = {
59
- :primary => Float(primary_sum) / results[:word_count],
60
- :secondary => Float(secondary_sum) / results[:word_count],
61
- :emotions => Float(emotion_sum) / results[:word_count]
62
- }
63
-
64
- results
65
- end
66
-
67
-
68
-
69
2
 
3
+ class RIDAnalyzer < Analyzer
70
4
 
5
+ def initialize
6
+ @dictionary = Dictionary.new(:rid)
71
7
  end
72
-
73
-
8
+
9
+ end
74
10
  end