opener-kaf-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: efcd70b4613807928b75e36c34e96b8ec22737e1
4
+ data.tar.gz: 7fb60e7fb63d37192efd2e2938b2a503e712358d
5
+ SHA512:
6
+ metadata.gz: b920bcb157bc0d1c5ce592ea7da6c454b89b2842dc055712d662f3a3fd8cdd762ecc3f61a4cfc15cc26e11e87ee701ab8a116734e92d13996d7b5efc0fa03d7c
7
+ data.tar.gz: cb85a15b2dc234e5d9abe33222f64131cbb6491a40ea243b3178065d73f30cb050c350cc9ce064a6fa7ac2c85d4fbf27bd1f9341971e3e47a417da9ce5912753
data/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2013, Olery
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
@@ -0,0 +1,72 @@
1
+ [![Build Status](https://drone.io/github.com/opener-project/ruby-kaf-parser/status.png)](https://drone.io/github.com/opener-project/ruby-kaf-parser/latest)
2
+
3
+ # Ruby KAF Parser
4
+
5
+ This repository contains the source code of the opener-kaf-parser, a simple and
6
+ fast KAF parser based on Nokogiri. The KAF parser is a stack based parser that
7
+ uses the SAX parsing API of Nokogiri, thus it should (in theory) be able to
8
+ handle large KAF files without too much trouble.
9
+
10
+ ## Usage
11
+
12
+ Create a parser instance and parse some KAF:
13
+
14
+ require 'opener/kaf_parser'
15
+
16
+ parser = Opener::KafParser::Parser.new
17
+ ast = parser.parse('...')
18
+
19
+ The return value is a list of `Opener::KafParser::AST` nodes which behave like
20
+ S expressions (and are formatted that way when calling `#inspect` on them).
21
+ Currently there are 3 node types:
22
+
23
+ * document
24
+ * text
25
+ * opinion
26
+
27
+ The latter groups a set of text nodes together that make up the opinion.
28
+
29
+ To iterate over these nodes you'd do something along the lines of the
30
+ following:
31
+
32
+ ast.language # => "en"
33
+
34
+ ast.children.each do |node|
35
+ if node.type == :text
36
+ puts "Word: #{node.inspect}"
37
+ else
38
+ puts "Opinion: #{node.inspect}"
39
+ end
40
+ end
41
+
42
+ ## Presenting Text
43
+
44
+ To present an AST/text you can use one of the standard presenter classes. For
45
+ example, if you want to turn an AST in a regular Ruby String you can use the
46
+ Text presenter:
47
+
48
+ ast = parser.parse('...')
49
+ presenter = Opener::KafParser::Presenter::Text.new
50
+
51
+ puts presenter.present(ast) # => "Hello, you are doing great"
52
+
53
+ Currently the following presenters are available:
54
+
55
+ * `Opener::KafParser::Presenter::Text`
56
+ * `Opener::KafParser::Presenter::HTML`
57
+
58
+ ## Requirements
59
+
60
+ * Ruby 1.9.3 or newer
61
+ * libxml2 (newer versions of Nokogiri ship libxml themselves)
62
+
63
+ ## Installation:
64
+
65
+ Installing as a Gem:
66
+
67
+ gem install opener-kaf-parser
68
+
69
+ Using Bundler:
70
+
71
+ gem 'opener-kaf-parser',
72
+ :git => 'git@github.com:opener-project/ruby-kaf-parser'
@@ -0,0 +1,68 @@
1
+ body
2
+ {
3
+ font-size: 14px;
4
+ line-height: 1.6;
5
+ margin: 0 auto;
6
+ max-width: 960px;
7
+ }
8
+
9
+ p code
10
+ {
11
+ background: #f2f2f2;
12
+ padding-left: 3px;
13
+ padding-right: 3px;
14
+ }
15
+
16
+ pre.code
17
+ {
18
+ font-size: 13px;
19
+ line-height: 1.4;
20
+ }
21
+
22
+ /**
23
+ * YARD uses generic table styles, using a special class means those tables
24
+ * don't get messed up.
25
+ */
26
+ .table
27
+ {
28
+ border: 1px solid #ccc;
29
+ border-right: none;
30
+ border-collapse: separate;
31
+ border-spacing: 0;
32
+ text-align: left;
33
+ }
34
+
35
+ .table.full
36
+ {
37
+ width: 100%;
38
+ }
39
+
40
+ .table .field_name
41
+ {
42
+ min-width: 160px;
43
+ }
44
+
45
+ .table thead tr th.no_sort:first-child
46
+ {
47
+ width: 25px;
48
+ }
49
+
50
+ .table thead tr th, .table tbody tr td
51
+ {
52
+ border-bottom: 1px solid #ccc;
53
+ border-right: 1px solid #ccc;
54
+ min-width: 20px;
55
+ padding: 8px 5px;
56
+ text-align: left;
57
+ vertical-align: top;
58
+ }
59
+
60
+ .table tbody tr:last-child td
61
+ {
62
+ border-bottom: none;
63
+ }
64
+
65
+ .table tr:nth-child(odd) td
66
+ {
67
+ background: #f9f9f9;
68
+ }
@@ -0,0 +1,16 @@
1
+ require 'nokogiri'
2
+ require 'time'
3
+ require 'builder'
4
+
5
+ require_relative 'kaf_parser/version'
6
+
7
+ require_relative 'kaf_parser/ast/base'
8
+ require_relative 'kaf_parser/ast/document'
9
+ require_relative 'kaf_parser/ast/text'
10
+ require_relative 'kaf_parser/ast/opinion'
11
+
12
+ require_relative 'kaf_parser/sax_parser'
13
+ require_relative 'kaf_parser/parser'
14
+
15
+ require_relative 'kaf_parser/presenter/text'
16
+ require_relative 'kaf_parser/presenter/html'
@@ -0,0 +1,76 @@
1
+ module Opener
2
+ module KafParser
3
+ module AST
4
+ ##
5
+ # Base node class that provides some common boilerplate for the various
6
+ # other node classes.
7
+ #
8
+ # @!attribute [rw] type
9
+ # @return [Symbol]
10
+ #
11
+ # @!attribute [rw] value
12
+ # @return [String]
13
+ #
14
+ # @!attribute [rw] children
15
+ # @return [Array<Opener::KafParser::AST::Base>]
16
+ #
17
+ class Base
18
+ attr_accessor :type, :value, :children
19
+
20
+ ##
21
+ # @param [Hash] attributes
22
+ #
23
+ def initialize(attributes = {})
24
+ attributes.each do |key, value|
25
+ instance_variable_set("@#{key}", value) if respond_to?(key)
26
+ end
27
+
28
+ @children ||= []
29
+ @type ||= :generic
30
+
31
+ after_initialize if respond_to?(:after_initialize)
32
+ end
33
+
34
+ ##
35
+ # @return [String]
36
+ #
37
+ def inspect(indent = 0)
38
+ spaces = ' ' * indent
39
+ child_values = children.map { |c| c.inspect(indent + 2) }
40
+ segments = ["#{spaces}(#{type}"]
41
+
42
+ if value
43
+ segments << "#{value.inspect}"
44
+ end
45
+
46
+ unless child_values.empty?
47
+ segments << "\n#{child_values.join("\n")}"
48
+ end
49
+
50
+ return segments.join(' ') + ')'
51
+ end
52
+
53
+ ##
54
+ # @return [Hash]
55
+ #
56
+ def attributes
57
+ return {}
58
+ end
59
+
60
+ ##
61
+ # @return [TrueClass|FalseClass]
62
+ #
63
+ def text?
64
+ return type == :text
65
+ end
66
+
67
+ ##
68
+ # @return [TrueClass|FalseClass]
69
+ #
70
+ def opinion?
71
+ return type == :opinion
72
+ end
73
+ end # Base
74
+ end # AST
75
+ end # KafParser
76
+ end # Opener
@@ -0,0 +1,33 @@
1
+ module Opener
2
+ module KafParser
3
+ module AST
4
+ ##
5
+ # The Document node class contains information about a `<KAF>` tag and
6
+ # all the child nodes.
7
+ #
8
+ # @!attribute [rw] language
9
+ # @return [String]
10
+ #
11
+ # @!attribute [rw] version
12
+ # @return [String]
13
+ #
14
+ class Document < Base
15
+ attr_accessor :language, :version
16
+
17
+ ##
18
+ # Called after a new instance of this class is created.
19
+ #
20
+ def after_initialize
21
+ @type = :document
22
+ end
23
+
24
+ ##
25
+ # @return [Hash]
26
+ #
27
+ def attributes
28
+ return {:language => language, :version => version}
29
+ end
30
+ end # Document
31
+ end # AST
32
+ end # KafParser
33
+ end # Opener
@@ -0,0 +1,52 @@
1
+ module Opener
2
+ module KafParser
3
+ module AST
4
+ ##
5
+ # The Opinion node class contains information about a opinion, the
6
+ # expression, polarity and more. The nodes that make up the expression of
7
+ # the opinion are stored in the `children` method.
8
+ #
9
+ # @!attribute [rw] id
10
+ # @return [String]
11
+ #
12
+ # @!attribute [rw] holder The nodes that make up the opinion holder.
13
+ # @return [Array]
14
+ #
15
+ # @!attribute [rw] target The nodes that make up the opinion target.
16
+ # @return [Array]
17
+ #
18
+ # @!attribute [rw] polarity
19
+ # @return [String]
20
+ #
21
+ # @!attribute [rw] strength
22
+ # @return [Numeric]
23
+ #
24
+ class Opinion < Base
25
+ attr_accessor :id, :holder, :target, :polarity, :strength
26
+
27
+ ##
28
+ # Called after a new instance of this class is created.
29
+ #
30
+ def after_initialize
31
+ @type = :opinion
32
+
33
+ @holder ||= []
34
+ @target ||= []
35
+ end
36
+
37
+ ##
38
+ # @return [Hash]
39
+ #
40
+ def attributes
41
+ return {
42
+ :id => id,
43
+ :holder => holder,
44
+ :target => target,
45
+ :polarity => polarity,
46
+ :strength => strength
47
+ }
48
+ end
49
+ end # Opinion
50
+ end # AST
51
+ end # KafParser
52
+ end # Opener
@@ -0,0 +1,75 @@
1
+ module Opener
2
+ module KafParser
3
+ module AST
4
+ ##
5
+ # Node class that contains information about a set of characters such as
6
+ # the polarity and POS.
7
+ #
8
+ # @!attribute [rw] id
9
+ # @return [Numeric]
10
+ #
11
+ # @!attribute [rw] sentence
12
+ # @return [Numeric]
13
+ #
14
+ # @!attribute [rw] paragraph
15
+ # @return [Numeric]
16
+ #
17
+ # @!attribute [rw] offset
18
+ # @return [Numeric]
19
+ #
20
+ # @!attribute [rw] length
21
+ # @return [Numeric]
22
+ #
23
+ # @!attribute [r] word_type
24
+ # @return [String]
25
+ #
26
+ # @!attribute [r] pos
27
+ # @return [String]
28
+ #
29
+ # @!attribute [rw] morphofeat
30
+ # @return [String]
31
+ #
32
+ # @!attribute [rw] sentiment_modifier
33
+ # @return [String]
34
+ #
35
+ # @!attribute [rw] polarity
36
+ # @return [String]
37
+ #
38
+ # @!attribute [rw] property
39
+ # @return [String]
40
+ #
41
+ class Text < Base
42
+ attr_accessor :id, :sentence, :paragraph, :offset, :length, :word_type,
43
+ :pos, :morphofeat, :sentiment_modifier, :polarity, :property
44
+
45
+ ##
46
+ # Called after a new instance of this class is created.
47
+ #
48
+ def after_initialize
49
+ @type = :text
50
+
51
+ @length ||= value.length
52
+ end
53
+
54
+ ##
55
+ # @return [Hash]
56
+ #
57
+ def attributes
58
+ return {
59
+ :id => id,
60
+ :sentence => sentence,
61
+ :paragraph => paragraph,
62
+ :offset => offset,
63
+ :length => length,
64
+ :word_type => word_type,
65
+ :pos => pos,
66
+ :morphofeat => morphofeat,
67
+ :sentiment_modifier => sentiment_modifier,
68
+ :polarity => polarity,
69
+ :property => property
70
+ }
71
+ end
72
+ end # Generic
73
+ end # AST
74
+ end # KafParser
75
+ end # Opener
@@ -0,0 +1,25 @@
1
+ module Opener
2
+ module KafParser
3
+ ##
4
+ # The Parser class acts as a slightly more user friendly interface around
5
+ # the Nokogiri SAX based parser.
6
+ #
7
+ class Parser
8
+ ##
9
+ # Parses the input KAF/XML and returns an instance of
10
+ # {Opener::KafParser::AST::Document}.
11
+ #
12
+ # @param [String] input The XML/KAF to parse.
13
+ # @return [Opener::KafParser::AST::Document]
14
+ #
15
+ def parse(input)
16
+ sax_parser = SaxParser.new
17
+ nokogiri_parser = Nokogiri::XML::SAX::Parser.new(sax_parser)
18
+
19
+ nokogiri_parser.parse(input)
20
+
21
+ return sax_parser.document
22
+ end
23
+ end # Parser
24
+ end # KafParser
25
+ end # Opener
@@ -0,0 +1,111 @@
1
+ module Opener
2
+ module KafParser
3
+ module Presenter
4
+ ##
5
+ # The HTML presenter takes an AST and turns it into a block of HTML where
6
+ # each word is wrapped in a tag and has various meta information (e.g.
7
+ # the polarity) assigned to it.
8
+ #
9
+ # Basic usage:
10
+ #
11
+ # parser = Opener::KafParser::Parser.new
12
+ # ast = parser.parse('...')
13
+ # presenter = Opener::KafParser::Presenter::HTML.new
14
+ #
15
+ # puts presenter.present(ast)
16
+ #
17
+ # ## Output
18
+ #
19
+ # The output is a set of span tags for each set of characters, span tags
20
+ # for whitespace and a set of span tags that group opinion expressions.
21
+ # Each span tag has a class indicating the type ("text", "opinion", etc)
22
+ # and a set of `data-*` attributes containing data such as the polarity.
23
+ # For example, the ID of a text node would be stored in `data-id`, the
24
+ # polarity in `data-polarity` and so forth.
25
+ #
26
+ class HTML < Text
27
+ ##
28
+ # @return [String]
29
+ #
30
+ SPACE = '&nbsp;'
31
+
32
+ ##
33
+ # @return [Array]
34
+ #
35
+ TYPES_WHITELIST = [String, Numeric]
36
+
37
+ ##
38
+ # Presents the AST as a collection of HTML tags.
39
+ #
40
+ # @param [Opener::KafParser::AST::Base] ast
41
+ # @return [String]
42
+ #
43
+ def present(ast)
44
+ offset = 0
45
+ builder = Builder::XmlMarkup.new
46
+
47
+ render_ast(ast, offset, builder)
48
+
49
+ return builder.target!
50
+ end
51
+
52
+ private
53
+
54
+ ##
55
+ # @param [Opener::KafParser::AST::Base] ast
56
+ # @param [Numeric] offset
57
+ # @param [Builder::XmlMarkup] builder
58
+ #
59
+ def render_ast(ast, offset, builder)
60
+ ast.children.each do |node|
61
+ if node.text?
62
+ offset = render_node(node, offset, builder)
63
+ else
64
+ render_span(node, builder) do |sub_builder|
65
+ render_ast(node, offset, builder)
66
+ end
67
+ end
68
+ end
69
+ end
70
+
71
+ ##
72
+ # @see #render_ast
73
+ #
74
+ def render_node(node, offset, builder)
75
+ diff = node.offset - offset
76
+
77
+ if diff > 0
78
+ builder.span(:class => 'whitespace') do |sub_builder|
79
+ sub_builder << SPACE * diff
80
+ end
81
+ end
82
+
83
+ render_span(node, builder)
84
+
85
+ return calculate_offset(node)
86
+ end
87
+
88
+ ##
89
+ # @param [Opener::KafParser::AST::Base] node
90
+ # @param [Builder::XmlMarkup] builder
91
+ #
92
+ def render_span(node, builder)
93
+ attrs = {'class' => node.type}
94
+
95
+ # Only store simple values in the HTML attributes.
96
+ node.attributes.each do |key, value|
97
+ if TYPES_WHITELIST.include?(value.class)
98
+ attrs["data-#{key}"] = value
99
+ end
100
+ end
101
+
102
+ if block_given?
103
+ builder.span(node.value, attrs) { |sub_builder| yield sub_builder }
104
+ else
105
+ builder.span(node.value, attrs)
106
+ end
107
+ end
108
+ end # HTML
109
+ end # Presenter
110
+ end # KafParser
111
+ end # Opener
@@ -0,0 +1,69 @@
1
+ module Opener
2
+ module KafParser
3
+ module Presenter
4
+ ##
5
+ # The Text presenter class takes an AST and builds a plain Ruby string
6
+ # containing the correct whitespace between various nodes.
7
+ #
8
+ class Text
9
+ ##
10
+ # Presents the AST as a plain Ruby String with no special formatting.
11
+ #
12
+ # @param [Opener::KafParser::AST::Base] ast
13
+ # @return [String]
14
+ #
15
+ def present(ast)
16
+ offset = 0
17
+ buffer = ''
18
+
19
+ render_ast(ast, offset, buffer)
20
+
21
+ return buffer
22
+ end
23
+
24
+ private
25
+
26
+ ##
27
+ # @param [Opener::KafParser::AST::Base] ast
28
+ # @param [Numeric] offset
29
+ # @param [String] buffer
30
+ #
31
+ def render_ast(ast, offset, buffer)
32
+ ast.children.each do |node|
33
+ if node.text?
34
+ offset = render_node(node, offset, buffer)
35
+ else
36
+ render_ast(node, offset, buffer)
37
+ end
38
+ end
39
+ end
40
+
41
+ ##
42
+ # @param [Opener::KafParser::AST::Text] node
43
+ # @param [Numeric] offset
44
+ # @param [String] buffer
45
+ # @return [Numeric]
46
+ #
47
+ def render_node(node, offset, buffer)
48
+ diff = node.offset - offset
49
+
50
+ if diff > 0
51
+ buffer << ' ' * diff
52
+ end
53
+
54
+ buffer << node.value
55
+
56
+ return calculate_offset(node)
57
+ end
58
+
59
+ ##
60
+ # @param [Opener::KafParser::AST::Text] node
61
+ # @return [Numeric]
62
+ #
63
+ def calculate_offset(node)
64
+ return node.offset + node.length
65
+ end
66
+ end # Text
67
+ end # Presenter
68
+ end # KafParser
69
+ end # Opener
@@ -0,0 +1,351 @@
1
+ module Opener
2
+ module KafParser
3
+ ##
4
+ # The SaxParser class is a Nokogiri SAX parser that builds a list of
5
+ # {Opener::KafParser::AST::Base} nodes containing word information such as
6
+ # the polarity and Part Of Speech as well as grouping words together based
7
+ # on the opinion expression they belong to.
8
+ #
9
+ # This SAX parser is a stack based parser and parses only relevant
10
+ # information of KAF documents. For example, the `<head>` of a KAF document
11
+ # is completely ignored.
12
+ #
13
+ # @!attribute [r] document
14
+ # @return [Opener::KafParser::Element::Document]
15
+ #
16
+ class SaxParser < Nokogiri::XML::SAX::Document
17
+ attr_reader :document
18
+
19
+ ##
20
+ # @see Nokogiri::XML::SAX::Document#initialize
21
+ #
22
+ def initialize(*args)
23
+ super
24
+
25
+ @stack = []
26
+ @attributes = []
27
+ @document = nil
28
+ @characters = ''
29
+ @targets = []
30
+ @buffer_characters = false
31
+ @buffer_targets = false
32
+ @word_mapping = {}
33
+ @term_mapping = {}
34
+ end
35
+
36
+ ##
37
+ # Called at the start of an XML element. This method delegates the work
38
+ # to individual method calls based on the node name.
39
+ #
40
+ # @param [String] name The name of the element.
41
+ # @param [Array] attributes
42
+ #
43
+ def start_element(name, attributes)
44
+ callback = 'on_' + callback_name(name)
45
+ attributes = associate_attributes(attributes)
46
+
47
+ execute_callback(callback, attributes)
48
+ end
49
+
50
+ ##
51
+ # @param [String] name The name of the element.
52
+ #
53
+ def end_element(name)
54
+ callback = 'after_' + callback_name(name)
55
+
56
+ execute_callback(callback)
57
+ end
58
+
59
+ ##
60
+ # Processes the characters of an XML node.
61
+ #
62
+ # @param [String] text
63
+ #
64
+ def characters(text)
65
+ @characters << text if @buffer_characters
66
+ end
67
+
68
+ ##
69
+ # Processes a `<KAF>` node.
70
+ #
71
+ # @param [Hash] attr
72
+ #
73
+ def on_kaf(attr)
74
+ @stack << AST::Document.new(
75
+ :language => attr.fetch('xml:lang', 'en'),
76
+ :version => attr['version']
77
+ )
78
+ end
79
+
80
+ ##
81
+ # @see #on_kaf
82
+ #
83
+ def after_kaf
84
+ @document = @stack.pop
85
+ end
86
+ ##
87
+ # Processes a `<wf>` node.
88
+ #
89
+ # @param [Hash] attr
90
+ #
91
+ def on_wf(attr)
92
+ @stack << AST::Text.new(
93
+ :id => attr['wid'],
94
+ :sentence => attr['sent'].to_i,
95
+ :offset => attr['offset'].to_i,
96
+ :length => attr['length'].to_i,
97
+ :paragraph => attr['para'].to_i
98
+ )
99
+
100
+ @buffer_characters = true
101
+ end
102
+
103
+ ##
104
+ # @see #on_wf
105
+ #
106
+ def after_wf
107
+ wf = @stack.pop
108
+ wf.value = @characters
109
+
110
+ current_object.children << wf
111
+
112
+ @word_mapping[wf.id] = wf
113
+
114
+ reset_character_buffer
115
+ end
116
+
117
+ ##
118
+ # Processes a `<term>` node.
119
+ #
120
+ # @param [Hash] attr
121
+ #
122
+ def on_term(attr)
123
+ @attributes << attr
124
+
125
+ @buffer_targets = true
126
+ end
127
+
128
+ ##
129
+ # @see #on_term
130
+ #
131
+ def after_term
132
+ attrs, sentiment = @attributes
133
+
134
+ @targets.each do |target|
135
+ word = @word_mapping[target]
136
+
137
+ word.morphofeat = attrs['morphofeat']
138
+ word.word_type = attrs['type']
139
+ word.pos = attrs['pos']
140
+
141
+ if sentiment
142
+ word.sentiment_modifier = sentiment['sentiment_modifier']
143
+ word.polarity = sentiment['polarity']
144
+ end
145
+
146
+ # Map the term IDs to the word form node.
147
+ @term_mapping[attrs['tid']] = word
148
+ end
149
+
150
+ reset_target_buffer
151
+ reset_attributes_buffer
152
+ end
153
+
154
+ ##
155
+ # Processes a `<target>` node.
156
+ #
157
+ # @param [Hash] attr
158
+ #
159
+ def on_target(attr)
160
+ @targets << attr['id'] if @buffer_targets
161
+ end
162
+
163
+ ##
164
+ # Processes a `<sentiment>` node.
165
+ #
166
+ # @param [Hash] attr
167
+ #
168
+ def on_sentiment(attr)
169
+ @attributes << attr
170
+ end
171
+
172
+ ##
173
+ # Processes a `<opinion>` node.
174
+ #
175
+ # @param [Hash] attr
176
+ #
177
+ def on_opinion(attr)
178
+ @stack << AST::Opinion.new(:id => attr['oid'])
179
+ end
180
+
181
+ ##
182
+ # @see #on_opinion
183
+ #
184
+ def after_opinion
185
+ opinion = @stack.pop
186
+ remove = opinion.children.each_with_object({}) do |node, hash|
187
+ hash[node.id] = true
188
+ end
189
+
190
+ # Insert the opinion node before the first node of the expression.
191
+ first_index = current_object.children.index(opinion.children[0])
192
+
193
+ current_object.children.insert(first_index, opinion)
194
+
195
+ # Remove the word nodes from the current object since they have been
196
+ # moved into the opinion node.
197
+ current_object.children.each do |node|
198
+ if node.is_a?(AST::Text) and remove.key?(node.id)
199
+ current_object.children.delete(node)
200
+ end
201
+ end
202
+ end
203
+
204
+ ##
205
+ # @param [Hash] attr
206
+ #
207
+ def on_opinion_holder(attr)
208
+ @buffer_targets = true
209
+ end
210
+
211
+ ##
212
+ # @see #on_opinion_holder
213
+ #
214
+ def after_opinion_holder
215
+ @targets.each do |target|
216
+ current_object.holder << @term_mapping[target]
217
+ end
218
+
219
+ reset_target_buffer
220
+ end
221
+
222
+ ##
223
+ # @param [Hash] attr
224
+ #
225
+ def on_opinion_target(attr)
226
+ @buffer_targets = true
227
+ end
228
+
229
+ ##
230
+ # @see #on_opinion_target
231
+ #
232
+ def after_opinion_target
233
+ @targets.each do |target|
234
+ current_object.target << @term_mapping[target]
235
+ end
236
+
237
+ reset_target_buffer
238
+ end
239
+
240
+ ##
241
+ # Processes an `<opinion-expression>` node.
242
+ #
243
+ # @param [Hash] attr
244
+ #
245
+ def on_opinion_expression(attr)
246
+ current_object.polarity = attr['polarity']
247
+ current_object.strength = attr['strength'].to_i
248
+
249
+ @buffer_targets = true
250
+ end
251
+
252
+ ##
253
+ # @see #on_opinion_expression
254
+ #
255
+ def after_opinion_expression
256
+ @targets.each do |target|
257
+ current_object.children << @term_mapping[target]
258
+ end
259
+
260
+ reset_target_buffer
261
+ end
262
+
263
+ ##
264
+ # Processes a `<property>` node.
265
+ #
266
+ # @param [Hash] attr
267
+ #
268
+ def on_property(attr)
269
+ @attributes << attr
270
+
271
+ @buffer_targets = true
272
+ end
273
+
274
+ ##
275
+ # @see #on_property
276
+ #
277
+ def after_property
278
+ attrs = @attributes.pop
279
+
280
+ @targets.each do |target|
281
+ @term_mapping[target].property = attrs['lemma']
282
+ end
283
+
284
+ reset_attributes_buffer
285
+ reset_target_buffer
286
+ end
287
+
288
+ private
289
+
290
+ ##
291
+ # Returns a callback name for the given XML node name.
292
+ #
293
+ # @param [String] name
294
+ # @return [String]
295
+ #
296
+ def callback_name(name)
297
+ return name.gsub(/([^A-Z]+)([A-Z]+)/, '\\1_\\2').downcase
298
+ end
299
+
300
+ ##
301
+ # @param [String] name
302
+ # @param [Array] args
303
+ #
304
+ def execute_callback(name, *args)
305
+ send(name, *args) if respond_to?(name)
306
+ end
307
+
308
+ ##
309
+ # Converts an Array of attributes into a Hash.
310
+ #
311
+ # @param [Array] attributes
312
+ # @return [Hash]
313
+ #
314
+ def associate_attributes(attributes)
315
+ return attributes.each_with_object({}) do |pair, hash|
316
+ hash[pair[0]] = pair[1]
317
+ end
318
+ end
319
+
320
+ ##
321
+ # @return [Mixed]
322
+ #
323
+ def current_object
324
+ return @stack.last
325
+ end
326
+
327
+ ##
328
+ # Resets the character buffer and disables buffering.
329
+ #
330
+ def reset_character_buffer
331
+ @buffer_characters = false
332
+ @characters = ''
333
+ end
334
+
335
+ ##
336
+ # Resets the target buffer and disables buffering.
337
+ #
338
+ def reset_target_buffer
339
+ @buffer_targets = false
340
+ @targets = []
341
+ end
342
+
343
+ ##
344
+ # Resets the attributes buffer.
345
+ #
346
+ def reset_attributes_buffer
347
+ @attributes = []
348
+ end
349
+ end # SaxParser
350
+ end # KafParser
351
+ end # Opener
@@ -0,0 +1,5 @@
1
+ module Opener
2
+ module KafParser
3
+ VERSION = '1.0.0'
4
+ end # KafParser
5
+ end # Opener
@@ -0,0 +1,29 @@
1
+ require File.expand_path('../lib/opener/kaf_parser/version', __FILE__)
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = 'opener-kaf-parser'
5
+ gem.version = Opener::KafParser::VERSION
6
+ gem.authors = ['Yorick Peterse <yorickpeterse@olery.com>']
7
+ gem.summary = 'A KAF parser written in Ruby.'
8
+ gem.description = gem.summary
9
+ gem.has_rdoc = 'yard'
10
+
11
+ gem.required_ruby_version = '>= 1.9.3'
12
+
13
+ gem.files = Dir.glob([
14
+ 'doc/**/*',
15
+ 'lib/**/*',
16
+ 'LICENSE',
17
+ '*.gemspec',
18
+ 'README.md'
19
+ ]).select { |file| File.file?(file) }
20
+
21
+ gem.add_dependency 'nokogiri'
22
+ gem.add_dependency 'builder'
23
+
24
+ gem.add_development_dependency 'rspec'
25
+ gem.add_development_dependency 'rake'
26
+ gem.add_development_dependency 'simplecov'
27
+ gem.add_development_dependency 'yard'
28
+ gem.add_development_dependency 'redcarpet', ['>= 2.0']
29
+ end
metadata ADDED
@@ -0,0 +1,155 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: opener-kaf-parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Yorick Peterse <yorickpeterse@olery.com>
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: builder
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: simplecov
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: yard
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: redcarpet
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '2.0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '2.0'
111
+ description: A KAF parser written in Ruby.
112
+ email:
113
+ executables: []
114
+ extensions: []
115
+ extra_rdoc_files: []
116
+ files:
117
+ - LICENSE
118
+ - README.md
119
+ - doc/css/common.css
120
+ - lib/opener/kaf_parser.rb
121
+ - lib/opener/kaf_parser/ast/base.rb
122
+ - lib/opener/kaf_parser/ast/document.rb
123
+ - lib/opener/kaf_parser/ast/opinion.rb
124
+ - lib/opener/kaf_parser/ast/text.rb
125
+ - lib/opener/kaf_parser/parser.rb
126
+ - lib/opener/kaf_parser/presenter/html.rb
127
+ - lib/opener/kaf_parser/presenter/text.rb
128
+ - lib/opener/kaf_parser/sax_parser.rb
129
+ - lib/opener/kaf_parser/version.rb
130
+ - opener-kaf-parser.gemspec
131
+ homepage:
132
+ licenses: []
133
+ metadata: {}
134
+ post_install_message:
135
+ rdoc_options: []
136
+ require_paths:
137
+ - lib
138
+ required_ruby_version: !ruby/object:Gem::Requirement
139
+ requirements:
140
+ - - ">="
141
+ - !ruby/object:Gem::Version
142
+ version: 1.9.3
143
+ required_rubygems_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ">="
146
+ - !ruby/object:Gem::Version
147
+ version: '0'
148
+ requirements: []
149
+ rubyforge_project:
150
+ rubygems_version: 2.2.2
151
+ signing_key:
152
+ specification_version: 4
153
+ summary: A KAF parser written in Ruby.
154
+ test_files: []
155
+ has_rdoc: yard