standoff 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +15 -0
  2. data/lib/standoff.rb +136 -0
  3. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YTJhNzY2ZTE5YzNiYThkN2U3NjVmZjI2MzgwMDkzODRhNDllMWYzMA==
5
+ data.tar.gz: !binary |-
6
+ NGIxMzIyMjdjNDkxNTRhMDAwNmM3ZTcwNmE0MjA2ZTMwYTcyODk0Yw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ OGIzYTBjOWZhZmQ2MmVhZGJkNDhhM2NmYTFkYWZmNjhhYTU4YzZiNzQ3OTQ5
10
+ NzA0MDVhYzAwMzkwYmU1NDg3NzY4NjMxNGVlMmQ0NDU0NDFhZWI1ZWZlNjY3
11
+ YmVkODE3YTkyNDUyMjgzYzdjZDdmM2RjNzQ4ZjI3ZWJmMTkwMDM=
12
+ data.tar.gz: !binary |-
13
+ OWJjMTczNWUxZWI4OTExY2Y0ZjM3NmYyNWZmNjJlNDhmOGNjNTkwY2EyY2My
14
+ YjUyY2ZiNzU1N2Q0Y2RiYTQ3MTMxOTJlYTRjNGViY2QzY2QzNDg2MzNkNzcy
15
+ Yzg1YjVkMGY5YmUzMGU2NWVmZWZlOWRkYmZhYjhiZDBkMWNiNDk=
data/lib/standoff.rb ADDED
@@ -0,0 +1,136 @@
1
+ =begin
2
+ Copyright 2015 The MITRE Corporation
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ =end
16
+
17
+ require 'rexml/document'
18
+
19
+ module Standoff
20
+ class AnnotatedString
21
+ attr_accessor :signal, :tags
22
+ def initialize(options = {})
23
+ if options[:signal] && options[:tags]
24
+ @signal = options[:signal]
25
+ @tags = options[:tags]
26
+ end
27
+ end
28
+
29
+ def tags (name = nil)
30
+ # without an argument, this is just an attr_accessor
31
+ return @tags unless name
32
+ # with an argument return all tags of given name type
33
+ @tags.select{|tag| tag.name == name}
34
+ end
35
+
36
+ def to_s # return the signal as a string with tags interpolated as inline XML
37
+ #takes into consideration the ordering of tags
38
+ xml = @signal.dup
39
+ datags = []
40
+ latetags = []
41
+ oldbegin = xml.length
42
+
43
+ # insert tags starting from the end of the string, so we can rely on the start and end indices
44
+ @tags.sort.reverse.each do |tag|
45
+ next if tag.end > oldbegin # AS allows overlapping tags, but we have to filter them when serializing to inline
46
+ oldbegin = tag.start
47
+ insert_tag(xml,tag)
48
+ end
49
+
50
+ xml
51
+ end
52
+
53
+ def inspect # re-define, otherwise the to_s overrides the default inspect
54
+ vars = self.instance_variables.
55
+ map{|v| "#{v}=#{instance_variable_get(v).inspect}"}.join(", ")
56
+ "<#{self.class}: #{vars}>"
57
+ end
58
+
59
+
60
+ def insert_tag(text,tag)
61
+ end_tag_form = '</' + tag.name + '>'
62
+ text.insert(tag.end, end_tag_form)
63
+ start_tag_form = '<' + tag.name + tag.attributes.map{|k, v| " #{k}=\'#{v}\'"}.join + '>'
64
+ text.insert(tag.start, start_tag_form)
65
+ return text
66
+ end
67
+
68
+ def previous_tag (tag)
69
+ # it's too bad we have to sort these every time. we should make @tags always be sorted.
70
+ tags = @tags.sort
71
+ index = tags.index tag
72
+ # we assume tag is a tag on self
73
+ raise "error in Standoff::AnnotatedString#previous_tag: argument should be a member of self.tags" if index.nil?
74
+ index > 0 ? tags[index - 1] : nil
75
+ end
76
+
77
+ def next_tag (tag)
78
+ # it's too bad we have to sort these every time. we should make @tags always be sorted.
79
+ tags = @tags.sort
80
+ index = tags.index tag
81
+ # we assume tag is a tag on self
82
+ raise "error in Standoff::AnnotatedString#previous_tag: argument should be a member of self.tags" if index.nil?
83
+ index < tags.length-1 ? tags[index + 1] : nil
84
+ end
85
+
86
+ end
87
+
88
+ class XMLParser
89
+ def initialize(source)
90
+ @parser = REXML::Parsers::BaseParser.new(source)
91
+ @signal = ""
92
+ @tags = []
93
+ end
94
+ def parse
95
+ while @parser.has_next?
96
+ snip_type, snip = @parser.pull
97
+ if snip_type == :text
98
+ @signal += snip
99
+ elsif snip_type == :start_element
100
+ name, attributes = snip
101
+ tag = Tag.new(:name => name, :attributes => attributes)
102
+ snip_type, snip = @parser.pull
103
+ raise ":text expected, #{snip_type.inspect}.found" if snip_type != :text
104
+ tag.start = @signal.length
105
+ tag.end = @signal.length + snip.length
106
+ tag.content = snip
107
+ @signal += snip
108
+ snip_type, snip = @parser.pull
109
+ raise ":end_element expected, #{snip_type.inspect}.found" if snip_type != :end_element
110
+ raise "mismatched tag: \"#{snip}\" end_element following \"#{name}\" start_element" if snip != name
111
+ @tags << tag
112
+ end
113
+ end
114
+ return AnnotatedString.new(:signal => @signal, :tags => @tags)
115
+ end
116
+ end
117
+
118
+ class Tag
119
+ attr_accessor :name, :attributes, :content, :start, :end
120
+ def initialize(options = {})
121
+ @name = options[:name] # string
122
+ @attributes = options[:attributes] # hash
123
+ @content = options[:content] # string
124
+ @start = options[:start] # numeric
125
+ @end = options[:end]
126
+ end
127
+ def <=> (other)
128
+ return @end <=> other.end
129
+ end
130
+ def overlap (other_tag)
131
+ ! ((@start .. @end).to_a & (other_tag.start .. other_tag.end).to_a).empty?
132
+ end
133
+ end
134
+
135
+ end
136
+
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: standoff
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.5
5
+ platform: ruby
6
+ authors:
7
+ - David Tresner-Kirsch
8
+ - Dan Noar
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-12-01 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: ! 'Standoff Annotation library: provides functionality for adding span
15
+ tags to strings. Allows export to inline XML.'
16
+ email: dwkirsch@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - lib/standoff.rb
22
+ homepage:
23
+ licenses: []
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.4.3
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: Standoff
45
+ test_files: []