opener-kaf-parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: efcd70b4613807928b75e36c34e96b8ec22737e1
4
+ data.tar.gz: 7fb60e7fb63d37192efd2e2938b2a503e712358d
5
+ SHA512:
6
+ metadata.gz: b920bcb157bc0d1c5ce592ea7da6c454b89b2842dc055712d662f3a3fd8cdd762ecc3f61a4cfc15cc26e11e87ee701ab8a116734e92d13996d7b5efc0fa03d7c
7
+ data.tar.gz: cb85a15b2dc234e5d9abe33222f64131cbb6491a40ea243b3178065d73f30cb050c350cc9ce064a6fa7ac2c85d4fbf27bd1f9341971e3e47a417da9ce5912753
data/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2013, Olery
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
@@ -0,0 +1,72 @@
1
+ [![Build Status](https://drone.io/github.com/opener-project/ruby-kaf-parser/status.png)](https://drone.io/github.com/opener-project/ruby-kaf-parser/latest)
2
+
3
+ # Ruby KAF Parser
4
+
5
+ This repository contains the source code of the opener-kaf-parser, a simple and
6
+ fast KAF parser based on Nokogiri. The KAF parser is a stack based parser that
7
+ uses the SAX parsing API of Nokogiri, thus it should (in theory) be able to
8
+ handle large KAF files without too much trouble.
9
+
10
+ ## Usage
11
+
12
+ Create a parser instance and parse some KAF:
13
+
14
+ require 'opener/kaf_parser'
15
+
16
+ parser = Opener::KafParser::Parser.new
17
+ ast = parser.parse('...')
18
+
19
+ The return value is a list of `Opener::KafParser::AST` nodes which behave like
20
+ S expressions (and are formatted that way when calling `#inspect` on them).
21
+ Currently there are 3 node types:
22
+
23
+ * document
24
+ * text
25
+ * opinion
26
+
27
+ The latter groups a set of text nodes together that make up the opinion.
28
+
29
+ To iterate over these nodes you'd do something along the lines of the
30
+ following:
31
+
32
+ ast.language # => "en"
33
+
34
+ ast.children.each do |node|
35
+ if node.type == :text
36
+ puts "Word: #{node.inspect}"
37
+ else
38
+ puts "Opinion: #{node.inspect}"
39
+ end
40
+ end
41
+
42
+ ## Presenting Text
43
+
44
+ To present an AST/text you can use one of the standard presenter classes. For
45
+ example, if you want to turn an AST in a regular Ruby String you can use the
46
+ Text presenter:
47
+
48
+ ast = parser.parse('...')
49
+ presenter = Opener::KafParser::Presenter::Text.new
50
+
51
+ puts presenter.present(ast) # => "Hello, you are doing great"
52
+
53
+ Currently the following presenters are available:
54
+
55
+ * `Opener::KafParser::Presenter::Text`
56
+ * `Opener::KafParser::Presenter::HTML`
57
+
58
+ ## Requirements
59
+
60
+ * Ruby 1.9.3 or newer
61
+ * libxml2 (newer versions of Nokogiri ship libxml themselves)
62
+
63
+ ## Installation:
64
+
65
+ Installing as a Gem:
66
+
67
+ gem install opener-kaf-parser
68
+
69
+ Using Bundler:
70
+
71
+ gem 'opener-kaf-parser',
72
+ :git => 'git@github.com:opener-project/ruby-kaf-parser'
@@ -0,0 +1,68 @@
1
+ body
2
+ {
3
+ font-size: 14px;
4
+ line-height: 1.6;
5
+ margin: 0 auto;
6
+ max-width: 960px;
7
+ }
8
+
9
+ p code
10
+ {
11
+ background: #f2f2f2;
12
+ padding-left: 3px;
13
+ padding-right: 3px;
14
+ }
15
+
16
+ pre.code
17
+ {
18
+ font-size: 13px;
19
+ line-height: 1.4;
20
+ }
21
+
22
+ /**
23
+ * YARD uses generic table styles, using a special class means those tables
24
+ * don't get messed up.
25
+ */
26
+ .table
27
+ {
28
+ border: 1px solid #ccc;
29
+ border-right: none;
30
+ border-collapse: separate;
31
+ border-spacing: 0;
32
+ text-align: left;
33
+ }
34
+
35
+ .table.full
36
+ {
37
+ width: 100%;
38
+ }
39
+
40
+ .table .field_name
41
+ {
42
+ min-width: 160px;
43
+ }
44
+
45
+ .table thead tr th.no_sort:first-child
46
+ {
47
+ width: 25px;
48
+ }
49
+
50
+ .table thead tr th, .table tbody tr td
51
+ {
52
+ border-bottom: 1px solid #ccc;
53
+ border-right: 1px solid #ccc;
54
+ min-width: 20px;
55
+ padding: 8px 5px;
56
+ text-align: left;
57
+ vertical-align: top;
58
+ }
59
+
60
+ .table tbody tr:last-child td
61
+ {
62
+ border-bottom: none;
63
+ }
64
+
65
+ .table tr:nth-child(odd) td
66
+ {
67
+ background: #f9f9f9;
68
+ }
@@ -0,0 +1,16 @@
1
+ require 'nokogiri'
2
+ require 'time'
3
+ require 'builder'
4
+
5
+ require_relative 'kaf_parser/version'
6
+
7
+ require_relative 'kaf_parser/ast/base'
8
+ require_relative 'kaf_parser/ast/document'
9
+ require_relative 'kaf_parser/ast/text'
10
+ require_relative 'kaf_parser/ast/opinion'
11
+
12
+ require_relative 'kaf_parser/sax_parser'
13
+ require_relative 'kaf_parser/parser'
14
+
15
+ require_relative 'kaf_parser/presenter/text'
16
+ require_relative 'kaf_parser/presenter/html'
@@ -0,0 +1,76 @@
1
+ module Opener
2
+ module KafParser
3
+ module AST
4
+ ##
5
+ # Base node class that provides some common boilerplate for the various
6
+ # other node classes.
7
+ #
8
+ # @!attribute [rw] type
9
+ # @return [Symbol]
10
+ #
11
+ # @!attribute [rw] value
12
+ # @return [String]
13
+ #
14
+ # @!attribute [rw] children
15
+ # @return [Array<Opener::KafParser::AST::Base>]
16
+ #
17
+ class Base
18
+ attr_accessor :type, :value, :children
19
+
20
+ ##
21
+ # @param [Hash] attributes
22
+ #
23
+ def initialize(attributes = {})
24
+ attributes.each do |key, value|
25
+ instance_variable_set("@#{key}", value) if respond_to?(key)
26
+ end
27
+
28
+ @children ||= []
29
+ @type ||= :generic
30
+
31
+ after_initialize if respond_to?(:after_initialize)
32
+ end
33
+
34
+ ##
35
+ # @return [String]
36
+ #
37
+ def inspect(indent = 0)
38
+ spaces = ' ' * indent
39
+ child_values = children.map { |c| c.inspect(indent + 2) }
40
+ segments = ["#{spaces}(#{type}"]
41
+
42
+ if value
43
+ segments << "#{value.inspect}"
44
+ end
45
+
46
+ unless child_values.empty?
47
+ segments << "\n#{child_values.join("\n")}"
48
+ end
49
+
50
+ return segments.join(' ') + ')'
51
+ end
52
+
53
+ ##
54
+ # @return [Hash]
55
+ #
56
+ def attributes
57
+ return {}
58
+ end
59
+
60
+ ##
61
+ # @return [TrueClass|FalseClass]
62
+ #
63
+ def text?
64
+ return type == :text
65
+ end
66
+
67
+ ##
68
+ # @return [TrueClass|FalseClass]
69
+ #
70
+ def opinion?
71
+ return type == :opinion
72
+ end
73
+ end # Base
74
+ end # AST
75
+ end # KafParser
76
+ end # Opener
@@ -0,0 +1,33 @@
1
+ module Opener
2
+ module KafParser
3
+ module AST
4
+ ##
5
+ # The Document node class contains information about a `<KAF>` tag and
6
+ # all the child nodes.
7
+ #
8
+ # @!attribute [rw] language
9
+ # @return [String]
10
+ #
11
+ # @!attribute [rw] version
12
+ # @return [String]
13
+ #
14
+ class Document < Base
15
+ attr_accessor :language, :version
16
+
17
+ ##
18
+ # Called after a new instance of this class is created.
19
+ #
20
+ def after_initialize
21
+ @type = :document
22
+ end
23
+
24
+ ##
25
+ # @return [Hash]
26
+ #
27
+ def attributes
28
+ return {:language => language, :version => version}
29
+ end
30
+ end # Document
31
+ end # AST
32
+ end # KafParser
33
+ end # Opener
@@ -0,0 +1,52 @@
1
+ module Opener
2
+ module KafParser
3
+ module AST
4
+ ##
5
+ # The Opinion node class contains information about a opinion, the
6
+ # expression, polarity and more. The nodes that make up the expression of
7
+ # the opinion are stored in the `children` method.
8
+ #
9
+ # @!attribute [rw] id
10
+ # @return [String]
11
+ #
12
+ # @!attribute [rw] holder The nodes that make up the opinion holder.
13
+ # @return [Array]
14
+ #
15
+ # @!attribute [rw] target The nodes that make up the opinion target.
16
+ # @return [Array]
17
+ #
18
+ # @!attribute [rw] polarity
19
+ # @return [String]
20
+ #
21
+ # @!attribute [rw] strength
22
+ # @return [Numeric]
23
+ #
24
+ class Opinion < Base
25
+ attr_accessor :id, :holder, :target, :polarity, :strength
26
+
27
+ ##
28
+ # Called after a new instance of this class is created.
29
+ #
30
+ def after_initialize
31
+ @type = :opinion
32
+
33
+ @holder ||= []
34
+ @target ||= []
35
+ end
36
+
37
+ ##
38
+ # @return [Hash]
39
+ #
40
+ def attributes
41
+ return {
42
+ :id => id,
43
+ :holder => holder,
44
+ :target => target,
45
+ :polarity => polarity,
46
+ :strength => strength
47
+ }
48
+ end
49
+ end # Opinion
50
+ end # AST
51
+ end # KafParser
52
+ end # Opener
@@ -0,0 +1,75 @@
1
+ module Opener
2
+ module KafParser
3
+ module AST
4
+ ##
5
+ # Node class that contains information about a set of characters such as
6
+ # the polarity and POS.
7
+ #
8
+ # @!attribute [rw] id
9
+ # @return [Numeric]
10
+ #
11
+ # @!attribute [rw] sentence
12
+ # @return [Numeric]
13
+ #
14
+ # @!attribute [rw] paragraph
15
+ # @return [Numeric]
16
+ #
17
+ # @!attribute [rw] offset
18
+ # @return [Numeric]
19
+ #
20
+ # @!attribute [rw] length
21
+ # @return [Numeric]
22
+ #
23
+ # @!attribute [r] word_type
24
+ # @return [String]
25
+ #
26
+ # @!attribute [r] pos
27
+ # @return [String]
28
+ #
29
+ # @!attribute [rw] morphofeat
30
+ # @return [String]
31
+ #
32
+ # @!attribute [rw] sentiment_modifier
33
+ # @return [String]
34
+ #
35
+ # @!attribute [rw] polarity
36
+ # @return [String]
37
+ #
38
+ # @!attribute [rw] property
39
+ # @return [String]
40
+ #
41
+ class Text < Base
42
+ attr_accessor :id, :sentence, :paragraph, :offset, :length, :word_type,
43
+ :pos, :morphofeat, :sentiment_modifier, :polarity, :property
44
+
45
+ ##
46
+ # Called after a new instance of this class is created.
47
+ #
48
+ def after_initialize
49
+ @type = :text
50
+
51
+ @length ||= value.length
52
+ end
53
+
54
+ ##
55
+ # @return [Hash]
56
+ #
57
+ def attributes
58
+ return {
59
+ :id => id,
60
+ :sentence => sentence,
61
+ :paragraph => paragraph,
62
+ :offset => offset,
63
+ :length => length,
64
+ :word_type => word_type,
65
+ :pos => pos,
66
+ :morphofeat => morphofeat,
67
+ :sentiment_modifier => sentiment_modifier,
68
+ :polarity => polarity,
69
+ :property => property
70
+ }
71
+ end
72
+ end # Generic
73
+ end # AST
74
+ end # KafParser
75
+ end # Opener
@@ -0,0 +1,25 @@
1
+ module Opener
2
+ module KafParser
3
+ ##
4
+ # The Parser class acts as a slightly more user friendly interface around
5
+ # the Nokogiri SAX based parser.
6
+ #
7
+ class Parser
8
+ ##
9
+ # Parses the input KAF/XML and returns an instance of
10
+ # {Opener::KafParser::AST::Document}.
11
+ #
12
+ # @param [String] input The XML/KAF to parse.
13
+ # @return [Opener::KafParser::AST::Document]
14
+ #
15
+ def parse(input)
16
+ sax_parser = SaxParser.new
17
+ nokogiri_parser = Nokogiri::XML::SAX::Parser.new(sax_parser)
18
+
19
+ nokogiri_parser.parse(input)
20
+
21
+ return sax_parser.document
22
+ end
23
+ end # Parser
24
+ end # KafParser
25
+ end # Opener
@@ -0,0 +1,111 @@
1
+ module Opener
2
+ module KafParser
3
+ module Presenter
4
+ ##
5
+ # The HTML presenter takes an AST and turns it into a block of HTML where
6
+ # each word is wrapped in a tag and has various meta information (e.g.
7
+ # the polarity) assigned to it.
8
+ #
9
+ # Basic usage:
10
+ #
11
+ # parser = Opener::KafParser::Parser.new
12
+ # ast = parser.parse('...')
13
+ # presenter = Opener::KafParser::Presenter::HTML.new
14
+ #
15
+ # puts presenter.present(ast)
16
+ #
17
+ # ## Output
18
+ #
19
+ # The output is a set of span tags for each set of characters, span tags
20
+ # for whitespace and a set of span tags that group opinion expressions.
21
+ # Each span tag has a class indicating the type ("text", "opinion", etc)
22
+ # and a set of `data-*` attributes containing data such as the polarity.
23
+ # For example, the ID of a text node would be stored in `data-id`, the
24
+ # polarity in `data-polarity` and so forth.
25
+ #
26
+ class HTML < Text
27
+ ##
28
+ # @return [String]
29
+ #
30
+ SPACE = '&nbsp;'
31
+
32
+ ##
33
+ # @return [Array]
34
+ #
35
+ TYPES_WHITELIST = [String, Numeric]
36
+
37
+ ##
38
+ # Presents the AST as a collection of HTML tags.
39
+ #
40
+ # @param [Opener::KafParser::AST::Base] ast
41
+ # @return [String]
42
+ #
43
+ def present(ast)
44
+ offset = 0
45
+ builder = Builder::XmlMarkup.new
46
+
47
+ render_ast(ast, offset, builder)
48
+
49
+ return builder.target!
50
+ end
51
+
52
+ private
53
+
54
+ ##
55
+ # @param [Opener::KafParser::AST::Base] ast
56
+ # @param [Numeric] offset
57
+ # @param [Builder::XmlMarkup] builder
58
+ #
59
+ def render_ast(ast, offset, builder)
60
+ ast.children.each do |node|
61
+ if node.text?
62
+ offset = render_node(node, offset, builder)
63
+ else
64
+ render_span(node, builder) do |sub_builder|
65
+ render_ast(node, offset, builder)
66
+ end
67
+ end
68
+ end
69
+ end
70
+
71
+ ##
72
+ # @see #render_ast
73
+ #
74
+ def render_node(node, offset, builder)
75
+ diff = node.offset - offset
76
+
77
+ if diff > 0
78
+ builder.span(:class => 'whitespace') do |sub_builder|
79
+ sub_builder << SPACE * diff
80
+ end
81
+ end
82
+
83
+ render_span(node, builder)
84
+
85
+ return calculate_offset(node)
86
+ end
87
+
88
+ ##
89
+ # @param [Opener::KafParser::AST::Base] node
90
+ # @param [Builder::XmlMarkup] builder
91
+ #
92
+ def render_span(node, builder)
93
+ attrs = {'class' => node.type}
94
+
95
+ # Only store simple values in the HTML attributes.
96
+ node.attributes.each do |key, value|
97
+ if TYPES_WHITELIST.include?(value.class)
98
+ attrs["data-#{key}"] = value
99
+ end
100
+ end
101
+
102
+ if block_given?
103
+ builder.span(node.value, attrs) { |sub_builder| yield sub_builder }
104
+ else
105
+ builder.span(node.value, attrs)
106
+ end
107
+ end
108
+ end # HTML
109
+ end # Presenter
110
+ end # KafParser
111
+ end # Opener
@@ -0,0 +1,69 @@
1
+ module Opener
2
+ module KafParser
3
+ module Presenter
4
+ ##
5
+ # The Text presenter class takes an AST and builds a plain Ruby string
6
+ # containing the correct whitespace between various nodes.
7
+ #
8
+ class Text
9
+ ##
10
+ # Presents the AST as a plain Ruby String with no special formatting.
11
+ #
12
+ # @param [Opener::KafParser::AST::Base] ast
13
+ # @return [String]
14
+ #
15
+ def present(ast)
16
+ offset = 0
17
+ buffer = ''
18
+
19
+ render_ast(ast, offset, buffer)
20
+
21
+ return buffer
22
+ end
23
+
24
+ private
25
+
26
+ ##
27
+ # @param [Opener::KafParser::AST::Base] ast
28
+ # @param [Numeric] offset
29
+ # @param [String] buffer
30
+ #
31
+ def render_ast(ast, offset, buffer)
32
+ ast.children.each do |node|
33
+ if node.text?
34
+ offset = render_node(node, offset, buffer)
35
+ else
36
+ render_ast(node, offset, buffer)
37
+ end
38
+ end
39
+ end
40
+
41
+ ##
42
+ # @param [Opener::KafParser::AST::Text] node
43
+ # @param [Numeric] offset
44
+ # @param [String] buffer
45
+ # @return [Numeric]
46
+ #
47
+ def render_node(node, offset, buffer)
48
+ diff = node.offset - offset
49
+
50
+ if diff > 0
51
+ buffer << ' ' * diff
52
+ end
53
+
54
+ buffer << node.value
55
+
56
+ return calculate_offset(node)
57
+ end
58
+
59
+ ##
60
+ # @param [Opener::KafParser::AST::Text] node
61
+ # @return [Numeric]
62
+ #
63
+ def calculate_offset(node)
64
+ return node.offset + node.length
65
+ end
66
+ end # Text
67
+ end # Presenter
68
+ end # KafParser
69
+ end # Opener
@@ -0,0 +1,351 @@
1
+ module Opener
2
+ module KafParser
3
+ ##
4
+ # The SaxParser class is a Nokogiri SAX parser that builds a list of
5
+ # {Opener::KafParser::AST::Base} nodes containing word information such as
6
+ # the polarity and Part Of Speech as well as grouping words together based
7
+ # on the opinion expression they belong to.
8
+ #
9
+ # This SAX parser is a stack based parser and parses only relevant
10
+ # information of KAF documents. For example, the `<head>` of a KAF document
11
+ # is completely ignored.
12
+ #
13
+ # @!attribute [r] document
14
+ # @return [Opener::KafParser::Element::Document]
15
+ #
16
+ class SaxParser < Nokogiri::XML::SAX::Document
17
+ attr_reader :document
18
+
19
+ ##
20
+ # @see Nokogiri::XML::SAX::Document#initialize
21
+ #
22
+ def initialize(*args)
23
+ super
24
+
25
+ @stack = []
26
+ @attributes = []
27
+ @document = nil
28
+ @characters = ''
29
+ @targets = []
30
+ @buffer_characters = false
31
+ @buffer_targets = false
32
+ @word_mapping = {}
33
+ @term_mapping = {}
34
+ end
35
+
36
+ ##
37
+ # Called at the start of an XML element. This method delegates the work
38
+ # to individual method calls based on the node name.
39
+ #
40
+ # @param [String] name The name of the element.
41
+ # @param [Array] attributes
42
+ #
43
+ def start_element(name, attributes)
44
+ callback = 'on_' + callback_name(name)
45
+ attributes = associate_attributes(attributes)
46
+
47
+ execute_callback(callback, attributes)
48
+ end
49
+
50
+ ##
51
+ # @param [String] name The name of the element.
52
+ #
53
+ def end_element(name)
54
+ callback = 'after_' + callback_name(name)
55
+
56
+ execute_callback(callback)
57
+ end
58
+
59
+ ##
60
+ # Processes the characters of an XML node.
61
+ #
62
+ # @param [String] text
63
+ #
64
+ def characters(text)
65
+ @characters << text if @buffer_characters
66
+ end
67
+
68
+ ##
69
+ # Processes a `<KAF>` node.
70
+ #
71
+ # @param [Hash] attr
72
+ #
73
+ def on_kaf(attr)
74
+ @stack << AST::Document.new(
75
+ :language => attr.fetch('xml:lang', 'en'),
76
+ :version => attr['version']
77
+ )
78
+ end
79
+
80
+ ##
81
+ # @see #on_kaf
82
+ #
83
+ def after_kaf
84
+ @document = @stack.pop
85
+ end
86
+ ##
87
+ # Processes a `<wf>` node.
88
+ #
89
+ # @param [Hash] attr
90
+ #
91
+ def on_wf(attr)
92
+ @stack << AST::Text.new(
93
+ :id => attr['wid'],
94
+ :sentence => attr['sent'].to_i,
95
+ :offset => attr['offset'].to_i,
96
+ :length => attr['length'].to_i,
97
+ :paragraph => attr['para'].to_i
98
+ )
99
+
100
+ @buffer_characters = true
101
+ end
102
+
103
+ ##
104
+ # @see #on_wf
105
+ #
106
+ def after_wf
107
+ wf = @stack.pop
108
+ wf.value = @characters
109
+
110
+ current_object.children << wf
111
+
112
+ @word_mapping[wf.id] = wf
113
+
114
+ reset_character_buffer
115
+ end
116
+
117
+ ##
118
+ # Processes a `<term>` node.
119
+ #
120
+ # @param [Hash] attr
121
+ #
122
+ def on_term(attr)
123
+ @attributes << attr
124
+
125
+ @buffer_targets = true
126
+ end
127
+
128
+ ##
129
+ # @see #on_term
130
+ #
131
+ def after_term
132
+ attrs, sentiment = @attributes
133
+
134
+ @targets.each do |target|
135
+ word = @word_mapping[target]
136
+
137
+ word.morphofeat = attrs['morphofeat']
138
+ word.word_type = attrs['type']
139
+ word.pos = attrs['pos']
140
+
141
+ if sentiment
142
+ word.sentiment_modifier = sentiment['sentiment_modifier']
143
+ word.polarity = sentiment['polarity']
144
+ end
145
+
146
+ # Map the term IDs to the word form node.
147
+ @term_mapping[attrs['tid']] = word
148
+ end
149
+
150
+ reset_target_buffer
151
+ reset_attributes_buffer
152
+ end
153
+
154
+ ##
155
+ # Processes a `<target>` node.
156
+ #
157
+ # @param [Hash] attr
158
+ #
159
+ def on_target(attr)
160
+ @targets << attr['id'] if @buffer_targets
161
+ end
162
+
163
+ ##
164
+ # Processes a `<sentiment>` node.
165
+ #
166
+ # @param [Hash] attr
167
+ #
168
+ def on_sentiment(attr)
169
+ @attributes << attr
170
+ end
171
+
172
+ ##
173
+ # Processes a `<opinion>` node.
174
+ #
175
+ # @param [Hash] attr
176
+ #
177
+ def on_opinion(attr)
178
+ @stack << AST::Opinion.new(:id => attr['oid'])
179
+ end
180
+
181
+ ##
182
+ # @see #on_opinion
183
+ #
184
+ def after_opinion
185
+ opinion = @stack.pop
186
+ remove = opinion.children.each_with_object({}) do |node, hash|
187
+ hash[node.id] = true
188
+ end
189
+
190
+ # Insert the opinion node before the first node of the expression.
191
+ first_index = current_object.children.index(opinion.children[0])
192
+
193
+ current_object.children.insert(first_index, opinion)
194
+
195
+ # Remove the word nodes from the current object since they have been
196
+ # moved into the opinion node.
197
+ current_object.children.each do |node|
198
+ if node.is_a?(AST::Text) and remove.key?(node.id)
199
+ current_object.children.delete(node)
200
+ end
201
+ end
202
+ end
203
+
204
+ ##
205
+ # @param [Hash] attr
206
+ #
207
+ def on_opinion_holder(attr)
208
+ @buffer_targets = true
209
+ end
210
+
211
+ ##
212
+ # @see #on_opinion_holder
213
+ #
214
+ def after_opinion_holder
215
+ @targets.each do |target|
216
+ current_object.holder << @term_mapping[target]
217
+ end
218
+
219
+ reset_target_buffer
220
+ end
221
+
222
+ ##
223
+ # @param [Hash] attr
224
+ #
225
+ def on_opinion_target(attr)
226
+ @buffer_targets = true
227
+ end
228
+
229
+ ##
230
+ # @see #on_opinion_target
231
+ #
232
+ def after_opinion_target
233
+ @targets.each do |target|
234
+ current_object.target << @term_mapping[target]
235
+ end
236
+
237
+ reset_target_buffer
238
+ end
239
+
240
+ ##
241
+ # Processes an `<opinion-expression>` node.
242
+ #
243
+ # @param [Hash] attr
244
+ #
245
+ def on_opinion_expression(attr)
246
+ current_object.polarity = attr['polarity']
247
+ current_object.strength = attr['strength'].to_i
248
+
249
+ @buffer_targets = true
250
+ end
251
+
252
+ ##
253
+ # @see #on_opinion_expression
254
+ #
255
+ def after_opinion_expression
256
+ @targets.each do |target|
257
+ current_object.children << @term_mapping[target]
258
+ end
259
+
260
+ reset_target_buffer
261
+ end
262
+
263
+ ##
264
+ # Processes a `<property>` node.
265
+ #
266
+ # @param [Hash] attr
267
+ #
268
+ def on_property(attr)
269
+ @attributes << attr
270
+
271
+ @buffer_targets = true
272
+ end
273
+
274
+ ##
275
+ # @see #on_property
276
+ #
277
+ def after_property
278
+ attrs = @attributes.pop
279
+
280
+ @targets.each do |target|
281
+ @term_mapping[target].property = attrs['lemma']
282
+ end
283
+
284
+ reset_attributes_buffer
285
+ reset_target_buffer
286
+ end
287
+
288
+ private
289
+
290
+ ##
291
+ # Returns a callback name for the given XML node name.
292
+ #
293
+ # @param [String] name
294
+ # @return [String]
295
+ #
296
+ def callback_name(name)
297
+ return name.gsub(/([^A-Z]+)([A-Z]+)/, '\\1_\\2').downcase
298
+ end
299
+
300
+ ##
301
+ # @param [String] name
302
+ # @param [Array] args
303
+ #
304
+ def execute_callback(name, *args)
305
+ send(name, *args) if respond_to?(name)
306
+ end
307
+
308
+ ##
309
+ # Converts an Array of attributes into a Hash.
310
+ #
311
+ # @param [Array] attributes
312
+ # @return [Hash]
313
+ #
314
+ def associate_attributes(attributes)
315
+ return attributes.each_with_object({}) do |pair, hash|
316
+ hash[pair[0]] = pair[1]
317
+ end
318
+ end
319
+
320
+ ##
321
+ # @return [Mixed]
322
+ #
323
+ def current_object
324
+ return @stack.last
325
+ end
326
+
327
+ ##
328
+ # Resets the character buffer and disables buffering.
329
+ #
330
+ def reset_character_buffer
331
+ @buffer_characters = false
332
+ @characters = ''
333
+ end
334
+
335
+ ##
336
+ # Resets the target buffer and disables buffering.
337
+ #
338
+ def reset_target_buffer
339
+ @buffer_targets = false
340
+ @targets = []
341
+ end
342
+
343
+ ##
344
+ # Resets the attributes buffer.
345
+ #
346
+ def reset_attributes_buffer
347
+ @attributes = []
348
+ end
349
+ end # SaxParser
350
+ end # KafParser
351
+ end # Opener
@@ -0,0 +1,5 @@
1
+ module Opener
2
+ module KafParser
3
+ VERSION = '1.0.0'
4
+ end # KafParser
5
+ end # Opener
@@ -0,0 +1,29 @@
1
+ require File.expand_path('../lib/opener/kaf_parser/version', __FILE__)
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = 'opener-kaf-parser'
5
+ gem.version = Opener::KafParser::VERSION
6
+ gem.authors = ['Yorick Peterse <yorickpeterse@olery.com>']
7
+ gem.summary = 'A KAF parser written in Ruby.'
8
+ gem.description = gem.summary
9
+ gem.has_rdoc = 'yard'
10
+
11
+ gem.required_ruby_version = '>= 1.9.3'
12
+
13
+ gem.files = Dir.glob([
14
+ 'doc/**/*',
15
+ 'lib/**/*',
16
+ 'LICENSE',
17
+ '*.gemspec',
18
+ 'README.md'
19
+ ]).select { |file| File.file?(file) }
20
+
21
+ gem.add_dependency 'nokogiri'
22
+ gem.add_dependency 'builder'
23
+
24
+ gem.add_development_dependency 'rspec'
25
+ gem.add_development_dependency 'rake'
26
+ gem.add_development_dependency 'simplecov'
27
+ gem.add_development_dependency 'yard'
28
+ gem.add_development_dependency 'redcarpet', ['>= 2.0']
29
+ end
metadata ADDED
@@ -0,0 +1,155 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: opener-kaf-parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Yorick Peterse <yorickpeterse@olery.com>
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: builder
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: simplecov
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: yard
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: redcarpet
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '2.0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '2.0'
111
+ description: A KAF parser written in Ruby.
112
+ email:
113
+ executables: []
114
+ extensions: []
115
+ extra_rdoc_files: []
116
+ files:
117
+ - LICENSE
118
+ - README.md
119
+ - doc/css/common.css
120
+ - lib/opener/kaf_parser.rb
121
+ - lib/opener/kaf_parser/ast/base.rb
122
+ - lib/opener/kaf_parser/ast/document.rb
123
+ - lib/opener/kaf_parser/ast/opinion.rb
124
+ - lib/opener/kaf_parser/ast/text.rb
125
+ - lib/opener/kaf_parser/parser.rb
126
+ - lib/opener/kaf_parser/presenter/html.rb
127
+ - lib/opener/kaf_parser/presenter/text.rb
128
+ - lib/opener/kaf_parser/sax_parser.rb
129
+ - lib/opener/kaf_parser/version.rb
130
+ - opener-kaf-parser.gemspec
131
+ homepage:
132
+ licenses: []
133
+ metadata: {}
134
+ post_install_message:
135
+ rdoc_options: []
136
+ require_paths:
137
+ - lib
138
+ required_ruby_version: !ruby/object:Gem::Requirement
139
+ requirements:
140
+ - - ">="
141
+ - !ruby/object:Gem::Version
142
+ version: 1.9.3
143
+ required_rubygems_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ">="
146
+ - !ruby/object:Gem::Version
147
+ version: '0'
148
+ requirements: []
149
+ rubyforge_project:
150
+ rubygems_version: 2.2.2
151
+ signing_key:
152
+ specification_version: 4
153
+ summary: A KAF parser written in Ruby.
154
+ test_files: []
155
+ has_rdoc: yard