standoff 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/lib/standoff.rb +136 -0
- metadata +45 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
YTJhNzY2ZTE5YzNiYThkN2U3NjVmZjI2MzgwMDkzODRhNDllMWYzMA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NGIxMzIyMjdjNDkxNTRhMDAwNmM3ZTcwNmE0MjA2ZTMwYTcyODk0Yw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
OGIzYTBjOWZhZmQ2MmVhZGJkNDhhM2NmYTFkYWZmNjhhYTU4YzZiNzQ3OTQ5
|
10
|
+
NzA0MDVhYzAwMzkwYmU1NDg3NzY4NjMxNGVlMmQ0NDU0NDFhZWI1ZWZlNjY3
|
11
|
+
YmVkODE3YTkyNDUyMjgzYzdjZDdmM2RjNzQ4ZjI3ZWJmMTkwMDM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
OWJjMTczNWUxZWI4OTExY2Y0ZjM3NmYyNWZmNjJlNDhmOGNjNTkwY2EyY2My
|
14
|
+
YjUyY2ZiNzU1N2Q0Y2RiYTQ3MTMxOTJlYTRjNGViY2QzY2QzNDg2MzNkNzcy
|
15
|
+
Yzg1YjVkMGY5YmUzMGU2NWVmZWZlOWRkYmZhYjhiZDBkMWNiNDk=
|
data/lib/standoff.rb
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
=begin
|
2
|
+
Copyright 2015 The MITRE Corporation
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
=end
|
16
|
+
|
17
|
+
require 'rexml/document'
|
18
|
+
|
19
|
+
module Standoff
|
20
|
+
class AnnotatedString
|
21
|
+
attr_accessor :signal, :tags
|
22
|
+
def initialize(options = {})
|
23
|
+
if options[:signal] && options[:tags]
|
24
|
+
@signal = options[:signal]
|
25
|
+
@tags = options[:tags]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def tags (name = nil)
|
30
|
+
# without an argument, this is just an attr_accessor
|
31
|
+
return @tags unless name
|
32
|
+
# with an argument return all tags of given name type
|
33
|
+
@tags.select{|tag| tag.name == name}
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_s # return the signal as a string with tags interpolated as inline XML
|
37
|
+
#takes into consideration the ordering of tags
|
38
|
+
xml = @signal.dup
|
39
|
+
datags = []
|
40
|
+
latetags = []
|
41
|
+
oldbegin = xml.length
|
42
|
+
|
43
|
+
# insert tags starting from the end of the string, so we can rely on the start and end indices
|
44
|
+
@tags.sort.reverse.each do |tag|
|
45
|
+
next if tag.end > oldbegin # AS allows overlapping tags, but we have to filter them when serializing to inline
|
46
|
+
oldbegin = tag.start
|
47
|
+
insert_tag(xml,tag)
|
48
|
+
end
|
49
|
+
|
50
|
+
xml
|
51
|
+
end
|
52
|
+
|
53
|
+
def inspect # re-define, otherwise the to_s overrides the default inspect
|
54
|
+
vars = self.instance_variables.
|
55
|
+
map{|v| "#{v}=#{instance_variable_get(v).inspect}"}.join(", ")
|
56
|
+
"<#{self.class}: #{vars}>"
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
def insert_tag(text,tag)
|
61
|
+
end_tag_form = '</' + tag.name + '>'
|
62
|
+
text.insert(tag.end, end_tag_form)
|
63
|
+
start_tag_form = '<' + tag.name + tag.attributes.map{|k, v| " #{k}=\'#{v}\'"}.join + '>'
|
64
|
+
text.insert(tag.start, start_tag_form)
|
65
|
+
return text
|
66
|
+
end
|
67
|
+
|
68
|
+
def previous_tag (tag)
|
69
|
+
# it's too bad we have to sort these every time. we should make @tags always be sorted.
|
70
|
+
tags = @tags.sort
|
71
|
+
index = tags.index tag
|
72
|
+
# we assume tag is a tag on self
|
73
|
+
raise "error in Standoff::AnnotatedString#previous_tag: argument should be a member of self.tags" if index.nil?
|
74
|
+
index > 0 ? tags[index - 1] : nil
|
75
|
+
end
|
76
|
+
|
77
|
+
def next_tag (tag)
|
78
|
+
# it's too bad we have to sort these every time. we should make @tags always be sorted.
|
79
|
+
tags = @tags.sort
|
80
|
+
index = tags.index tag
|
81
|
+
# we assume tag is a tag on self
|
82
|
+
raise "error in Standoff::AnnotatedString#previous_tag: argument should be a member of self.tags" if index.nil?
|
83
|
+
index < tags.length-1 ? tags[index + 1] : nil
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
class XMLParser
|
89
|
+
def initialize(source)
|
90
|
+
@parser = REXML::Parsers::BaseParser.new(source)
|
91
|
+
@signal = ""
|
92
|
+
@tags = []
|
93
|
+
end
|
94
|
+
def parse
|
95
|
+
while @parser.has_next?
|
96
|
+
snip_type, snip = @parser.pull
|
97
|
+
if snip_type == :text
|
98
|
+
@signal += snip
|
99
|
+
elsif snip_type == :start_element
|
100
|
+
name, attributes = snip
|
101
|
+
tag = Tag.new(:name => name, :attributes => attributes)
|
102
|
+
snip_type, snip = @parser.pull
|
103
|
+
raise ":text expected, #{snip_type.inspect}.found" if snip_type != :text
|
104
|
+
tag.start = @signal.length
|
105
|
+
tag.end = @signal.length + snip.length
|
106
|
+
tag.content = snip
|
107
|
+
@signal += snip
|
108
|
+
snip_type, snip = @parser.pull
|
109
|
+
raise ":end_element expected, #{snip_type.inspect}.found" if snip_type != :end_element
|
110
|
+
raise "mismatched tag: \"#{snip}\" end_element following \"#{name}\" start_element" if snip != name
|
111
|
+
@tags << tag
|
112
|
+
end
|
113
|
+
end
|
114
|
+
return AnnotatedString.new(:signal => @signal, :tags => @tags)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
class Tag
|
119
|
+
attr_accessor :name, :attributes, :content, :start, :end
|
120
|
+
def initialize(options = {})
|
121
|
+
@name = options[:name] # string
|
122
|
+
@attributes = options[:attributes] # hash
|
123
|
+
@content = options[:content] # string
|
124
|
+
@start = options[:start] # numeric
|
125
|
+
@end = options[:end]
|
126
|
+
end
|
127
|
+
def <=> (other)
|
128
|
+
return @end <=> other.end
|
129
|
+
end
|
130
|
+
def overlap (other_tag)
|
131
|
+
! ((@start .. @end).to_a & (other_tag.start .. other_tag.end).to_a).empty?
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: standoff
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.5
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Tresner-Kirsch
|
8
|
+
- Dan Noar
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2014-12-01 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: ! 'Standoff Annotation library: provides functionality for adding span
|
15
|
+
tags to strings. Allows export to inline XML.'
|
16
|
+
email: dwkirsch@gmail.com
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- lib/standoff.rb
|
22
|
+
homepage:
|
23
|
+
licenses: []
|
24
|
+
metadata: {}
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ! '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 2.4.3
|
42
|
+
signing_key:
|
43
|
+
specification_version: 4
|
44
|
+
summary: Standoff
|
45
|
+
test_files: []
|