standoff 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +15 -0
  2. data/lib/standoff.rb +136 -0
  3. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YTJhNzY2ZTE5YzNiYThkN2U3NjVmZjI2MzgwMDkzODRhNDllMWYzMA==
5
+ data.tar.gz: !binary |-
6
+ NGIxMzIyMjdjNDkxNTRhMDAwNmM3ZTcwNmE0MjA2ZTMwYTcyODk0Yw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ OGIzYTBjOWZhZmQ2MmVhZGJkNDhhM2NmYTFkYWZmNjhhYTU4YzZiNzQ3OTQ5
10
+ NzA0MDVhYzAwMzkwYmU1NDg3NzY4NjMxNGVlMmQ0NDU0NDFhZWI1ZWZlNjY3
11
+ YmVkODE3YTkyNDUyMjgzYzdjZDdmM2RjNzQ4ZjI3ZWJmMTkwMDM=
12
+ data.tar.gz: !binary |-
13
+ OWJjMTczNWUxZWI4OTExY2Y0ZjM3NmYyNWZmNjJlNDhmOGNjNTkwY2EyY2My
14
+ YjUyY2ZiNzU1N2Q0Y2RiYTQ3MTMxOTJlYTRjNGViY2QzY2QzNDg2MzNkNzcy
15
+ Yzg1YjVkMGY5YmUzMGU2NWVmZWZlOWRkYmZhYjhiZDBkMWNiNDk=
data/lib/standoff.rb ADDED
@@ -0,0 +1,136 @@
1
+ =begin
2
+ Copyright 2015 The MITRE Corporation
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ =end
16
+
17
+ require 'rexml/document'
18
+
19
+ module Standoff
20
+ class AnnotatedString
21
+ attr_accessor :signal, :tags
22
+ def initialize(options = {})
23
+ if options[:signal] && options[:tags]
24
+ @signal = options[:signal]
25
+ @tags = options[:tags]
26
+ end
27
+ end
28
+
29
+ def tags (name = nil)
30
+ # without an argument, this is just an attr_accessor
31
+ return @tags unless name
32
+ # with an argument return all tags of given name type
33
+ @tags.select{|tag| tag.name == name}
34
+ end
35
+
36
+ def to_s # return the signal as a string with tags interpolated as inline XML
37
+ #takes into consideration the ordering of tags
38
+ xml = @signal.dup
39
+ datags = []
40
+ latetags = []
41
+ oldbegin = xml.length
42
+
43
+ # insert tags starting from the end of the string, so we can rely on the start and end indices
44
+ @tags.sort.reverse.each do |tag|
45
+ next if tag.end > oldbegin # AS allows overlapping tags, but we have to filter them when serializing to inline
46
+ oldbegin = tag.start
47
+ insert_tag(xml,tag)
48
+ end
49
+
50
+ xml
51
+ end
52
+
53
+ def inspect # re-define, otherwise the to_s overrides the default inspect
54
+ vars = self.instance_variables.
55
+ map{|v| "#{v}=#{instance_variable_get(v).inspect}"}.join(", ")
56
+ "<#{self.class}: #{vars}>"
57
+ end
58
+
59
+
60
+ def insert_tag(text,tag)
61
+ end_tag_form = '</' + tag.name + '>'
62
+ text.insert(tag.end, end_tag_form)
63
+ start_tag_form = '<' + tag.name + tag.attributes.map{|k, v| " #{k}=\'#{v}\'"}.join + '>'
64
+ text.insert(tag.start, start_tag_form)
65
+ return text
66
+ end
67
+
68
+ def previous_tag (tag)
69
+ # it's too bad we have to sort these every time. we should make @tags always be sorted.
70
+ tags = @tags.sort
71
+ index = tags.index tag
72
+ # we assume tag is a tag on self
73
+ raise "error in Standoff::AnnotatedString#previous_tag: argument should be a member of self.tags" if index.nil?
74
+ index > 0 ? tags[index - 1] : nil
75
+ end
76
+
77
+ def next_tag (tag)
78
+ # it's too bad we have to sort these every time. we should make @tags always be sorted.
79
+ tags = @tags.sort
80
+ index = tags.index tag
81
+ # we assume tag is a tag on self
82
+ raise "error in Standoff::AnnotatedString#previous_tag: argument should be a member of self.tags" if index.nil?
83
+ index < tags.length-1 ? tags[index + 1] : nil
84
+ end
85
+
86
+ end
87
+
88
+ class XMLParser
89
+ def initialize(source)
90
+ @parser = REXML::Parsers::BaseParser.new(source)
91
+ @signal = ""
92
+ @tags = []
93
+ end
94
+ def parse
95
+ while @parser.has_next?
96
+ snip_type, snip = @parser.pull
97
+ if snip_type == :text
98
+ @signal += snip
99
+ elsif snip_type == :start_element
100
+ name, attributes = snip
101
+ tag = Tag.new(:name => name, :attributes => attributes)
102
+ snip_type, snip = @parser.pull
103
+ raise ":text expected, #{snip_type.inspect}.found" if snip_type != :text
104
+ tag.start = @signal.length
105
+ tag.end = @signal.length + snip.length
106
+ tag.content = snip
107
+ @signal += snip
108
+ snip_type, snip = @parser.pull
109
+ raise ":end_element expected, #{snip_type.inspect}.found" if snip_type != :end_element
110
+ raise "mismatched tag: \"#{snip}\" end_element following \"#{name}\" start_element" if snip != name
111
+ @tags << tag
112
+ end
113
+ end
114
+ return AnnotatedString.new(:signal => @signal, :tags => @tags)
115
+ end
116
+ end
117
+
118
+ class Tag
119
+ attr_accessor :name, :attributes, :content, :start, :end
120
+ def initialize(options = {})
121
+ @name = options[:name] # string
122
+ @attributes = options[:attributes] # hash
123
+ @content = options[:content] # string
124
+ @start = options[:start] # numeric
125
+ @end = options[:end]
126
+ end
127
+ def <=> (other)
128
+ return @end <=> other.end
129
+ end
130
+ def overlap (other_tag)
131
+ ! ((@start .. @end).to_a & (other_tag.start .. other_tag.end).to_a).empty?
132
+ end
133
+ end
134
+
135
+ end
136
+
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: standoff
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.5
5
+ platform: ruby
6
+ authors:
7
+ - David Tresner-Kirsch
8
+ - Dan Noar
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-12-01 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: ! 'Standoff Annotation library: provides functionality for adding span
15
+ tags to strings. Allows export to inline XML.'
16
+ email: dwkirsch@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - lib/standoff.rb
22
+ homepage:
23
+ licenses: []
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.4.3
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: Standoff
45
+ test_files: []