slaw 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a0da8d0d88cfd753f8ef854937248c8881440390
4
+ data.tar.gz: 0762a47f6b0bac65d4b3fe829bc2a4997f3b23f9
5
+ SHA512:
6
+ metadata.gz: 4ab788b276cd06d1735bb859a1f7fa08820f9bf65b2c6282df65ea1fd2303cbd5b42433366a3a0b2a7a20dbe227e78cc6b5caa2ab3b5cb988d6c2a27097f05ce
7
+ data.tar.gz: 3882e5a3b292dfcd9adecb0b2077f9cb21a5d5e76e90402a09c77f146a1ec3acb0272649daf03cc64556864bfd5d8a921d873cbb20534c626542894a02218372
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+ ruby '2.1.1'
3
+
4
+ # Specify your gem's dependencies in slaw.gemspec
5
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Greg Kempe
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # Slaw
2
+
3
+ Slaw is a lightweight library for rendering and generating Akoma Ntoso acts from plain text and PDF documents.
4
+ It is used to power [openbylaws.org.za](http://openbylaws.org.za).
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'slaw'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install slaw
19
+
20
+ ## Usage
21
+
22
+ TODO: Write usage instructions here
23
+
24
+ ## Contributing
25
+
26
+ 1. Fork it ( http://github.com/longhotsummer/slaw/fork )
27
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
28
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
29
+ 4. Push to the branch (`git push origin my-new-feature`)
30
+ 5. Create new Pull Request
31
+
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ begin
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ rescue LoadError
7
+ end
data/lib/slaw/act.rb ADDED
@@ -0,0 +1,243 @@
1
+ module Slaw
2
+ # Wraps an AkomaNtoso 2.0 XML document describing an Act.
3
+ class Act
4
+ include Slaw::Namespace
5
+
6
+ # Allow us to jump from the XML document for an act to the
7
+ # Act instance itself
8
+ @@acts = {}
9
+
10
+ attr_accessor :doc, :meta, :body, :num, :year, :id_uri
11
+ attr_accessor :filename, :mtime
12
+
13
+ def self.for_node(node)
14
+ @@acts[node.document]
15
+ end
16
+
17
+ # Create a new instance
18
+ def initialize(filename=nil)
19
+ self.load(filename) if filename
20
+ end
21
+
22
+ # Load the XML from +filename+
23
+ def load(filename)
24
+ @filename = filename
25
+ @mtime = File::mtime(@filename)
26
+
27
+ File.open(filename) { |f| parse(f) }
28
+ end
29
+
30
+ # Parse the XML contained in the file-like object +io+
31
+ def parse(io)
32
+ @doc = Nokogiri::XML(io)
33
+ @meta = @doc.at_xpath('/a:akomaNtoso/a:act/a:meta', a: NS)
34
+ @body = @doc.at_xpath('/a:akomaNtoso/a:act/a:body', a: NS)
35
+
36
+ @@acts[@doc] = self
37
+
38
+ extract_id
39
+ end
40
+
41
+ def extract_id
42
+ @id_uri = @meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRuri', a: NS)['value']
43
+ empty, @country, type, date, @num = @id_uri.split('/')
44
+
45
+ # yyyy-mm-dd
46
+ @year = date.split('-', 2)[0]
47
+ end
48
+
49
+ def short_title
50
+ unless @short_title
51
+ node = @meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRalias', a: NS)
52
+ if node
53
+ @short_title = node['value']
54
+ else
55
+ @short_title = "Act #{num} of #{year}"
56
+ end
57
+ end
58
+
59
+ @short_title
60
+ end
61
+
62
+ def url_path
63
+ "/#{@country}/acts/#{@year}/#{@num}/"
64
+ end
65
+
66
+ def url_file
67
+ "act-#{@year}-#{@num}"
68
+ end
69
+
70
+ # Has this act been amended?
71
+ def amended?
72
+ @doc.at_xpath('/a:akomaNtoso/a:act', a: NS)['contains'] != 'originalVersion'
73
+ end
74
+
75
+ # a list of LifecycleEvent objects for amendment events, in date order
76
+ def amendment_events
77
+ @meta.xpath('./a:lifecycle/a:eventRef[@type="amendment"]', a: NS).map do |event|
78
+ LifecycleEvent.new(event)
79
+ end.sort_by { |e| e.date }
80
+ end
81
+
82
+ # Mark this act as being amended by another act, either +act+
83
+ # or the details in +opts+:
84
+ #
85
+ # :uri: uri of the amending act
86
+ # :title: title of the amending act
87
+ # :date: date of the amendment
88
+ #
89
+ # It is assumed that there can be only one amendment event on a particular
90
+ # date. An existing amendment on this date is overwritten.
91
+ def amended_by!(act, opts={})
92
+ if act
93
+ opts[:uri] ||= act.id_uri
94
+ opts[:title] ||= act.short_title
95
+ opts[:date] ||= act.publication['date']
96
+ end
97
+
98
+ date = opts[:date]
99
+ source_id = "amendment-#{date}"
100
+
101
+ # assume we now hold a single version and not the original version
102
+ @doc.at_xpath('/a:akomaNtoso/a:act', a: NS)['contains'] = 'singleVersion'
103
+
104
+ # add the lifecycle event
105
+ lifecycle = @meta.at_xpath('./a:lifecycle', a: NS)
106
+ if not lifecycle
107
+ lifecycle = @doc.create_element('lifecycle', source: "#this")
108
+ @meta.at_xpath('./a:publication', a: NS).after(lifecycle)
109
+ end
110
+
111
+ event = lifecycle.at_xpath('./a:eventRef[@date="' + date + '"][@type="amendment"]', a: NS)
112
+ if event
113
+ # clear up old event
114
+ src = @doc.at_css(event['source'])
115
+ src.remove if src
116
+ else
117
+ # new event
118
+ event = @doc.create_element('eventRef', type: 'amendment')
119
+ lifecycle << event
120
+ end
121
+
122
+ event['date'] = date
123
+ event['id'] = "amendment-event-#{date}"
124
+ event['source'] = '#' + source_id
125
+
126
+ # add reference
127
+ ref = @doc.create_element('passiveRef',
128
+ id: source_id,
129
+ href: opts[:uri],
130
+ showAs: opts[:title])
131
+
132
+ @meta.at_xpath('./a:references/a:TLCTerm', a: NS).before(ref)
133
+ end
134
+
135
+ # Does this Act have parts?
136
+ def parts?
137
+ !parts.empty?
138
+ end
139
+
140
+ def parts
141
+ @body.xpath('./a:part', a: NS)
142
+ end
143
+
144
+ def chapters?
145
+ !chapters.empty?
146
+ end
147
+
148
+ def chapters
149
+ @body.xpath('./a:chapter', a: NS)
150
+ end
151
+
152
+ def sections
153
+ @body.xpath('.//a:section', a: NS)
154
+ end
155
+
156
+ # The XML node representing the definitions section
157
+ def definitions
158
+ # try looking for the definition list
159
+ defn = @body.at_css('#definitions')
160
+ return defn.parent if defn
161
+
162
+ # try looking for the heading
163
+ defn = @body.at_xpath('.//a:section/a:heading[text() = "Definitions"]', a: NS)
164
+ return defn.parent if defn
165
+
166
+ nil
167
+ end
168
+
169
+ # The XML node representing the schedules document
170
+ def schedules
171
+ @doc.at_xpath('/a:akomaNtoso/a:components/a:component/a:doc[@name="schedules"]/a:mainBody', a: NS)
172
+ end
173
+
174
+ # Get a map from term ids to +[term, defn]+ pairs,
175
+ # where +term+ is the text term NS+defn+ is
176
+ # the XML node with the definition in it.
177
+ def term_definitions
178
+ terms = {}
179
+
180
+ @meta.xpath('a:references/a:TLCTerm', a: NS).each do |node|
181
+ # <TLCTerm id="term-affected_land" href="/ontology/term/this.eng.affected_land" showAs="affected land"/>
182
+
183
+ # find the point with id 'def-term-foo'
184
+ defn = @body.at_xpath(".//*[@id='def-#{node['id']}']", a: NS)
185
+ next unless defn
186
+
187
+ terms[node['id']] = [node['showAs'], defn]
188
+ end
189
+
190
+ terms
191
+ end
192
+
193
+ # Returns the publication element, if any.
194
+ def publication
195
+ @meta.at_xpath('./a:publication', a: NS)
196
+ end
197
+
198
+ # Has this by-law been repealed?
199
+ def repealed?
200
+ !!repealed_on
201
+ end
202
+
203
+ # The date on which this act was repealed, or nil if never repealed
204
+ def repealed_on
205
+ repeal_el = repeal
206
+ repeal_el ? Time.parse(repeal_el['date']) : nil
207
+ end
208
+
209
+ # The element representing the reference that caused the repeal of this
210
+ # act, or nil
211
+ def repealed_by
212
+ repeal_el = repeal
213
+ return nil unless repeal_el
214
+
215
+ source_id = repeal_el['source'].sub(/^#/, '')
216
+ @meta.at_xpath("./a:references/a:passiveRef[@id='#{source_id}']", a: NS)
217
+ end
218
+
219
+ # The XML element representing the repeal of this act, or nil
220
+ def repeal
221
+ # <lifecycle source="#this">
222
+ # <eventRef id="e1" date="2010-07-28" source="#original" type="generation"/>
223
+ # <eventRef id="e2" date="2012-04-26" source="#amendment-1" type="amendment"/>
224
+ # <eventRef id="e3" date="2014-01-17" source="#repeal" type="repeal"/>
225
+ # </lifecycle>
226
+ @meta.at_xpath('./a:lifecycle/a:eventRef[@type="repeal"]', a: NS)
227
+ end
228
+
229
+ def manifestation_date
230
+ node = @meta.at_xpath('./a:identification/a:FRBRManifestation/a:FRBRdate[@name="Generation"]', a: NS)
231
+ node && node['date']
232
+ end
233
+
234
+ def nature
235
+ "act"
236
+ end
237
+
238
+ def inspect
239
+ "<#{self.class.name} @id_uri=\"#{@id_uri}\">"
240
+ end
241
+ end
242
+
243
+ end
data/lib/slaw/bylaw.rb ADDED
@@ -0,0 +1,53 @@
1
+ require 'slaw/act'
2
+
3
+ module Slaw
4
+ # Wraps an AkomaNtoso XML document describing an Act classed as a By-Law
5
+ class ByLaw < Act
6
+
7
+ attr_accessor :region, :name
8
+
9
+ def extract_id
10
+ # /za/by-law/cape-town/2010/public-parks
11
+
12
+ @id_uri = @meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRuri', a: NS)['value']
13
+ empty, @country, type, @region, date, @name = @id_uri.split('/')
14
+
15
+ # yyyy[-mm-dd]
16
+ @year = date.split('-', 2)[0]
17
+ end
18
+
19
+ # ByLaws don't have numbers, use their short-name instead
20
+ def num
21
+ name
22
+ end
23
+
24
+ def short_title
25
+ unless @short_title
26
+ node = @meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRalias', a: NS)
27
+ if node
28
+ @short_title = node['value']
29
+ else
30
+ @short_title = "(Unknown)"
31
+ end
32
+
33
+ if amended? and not @short_title.end_with?("as amended")
34
+ @short_title = @short_title + " as amended"
35
+ end
36
+ end
37
+
38
+ @short_title
39
+ end
40
+
41
+ def url_path
42
+ "/#{@country}/by-law/#{@region}/#{@year}/#{@name}/"
43
+ end
44
+
45
+ def url_file
46
+ @name
47
+ end
48
+
49
+ def nature
50
+ "by-law"
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,32 @@
1
+ require 'forwardable'
2
+
3
+ module Slaw
4
+ # A collection of Act instances.
5
+ class DocumentCollection
6
+
7
+ include Enumerable
8
+ extend Forwardable
9
+
10
+ attr_accessor :items
11
+
12
+ def_delegators :items, :each, :<<, :length
13
+
14
+ def initialize(items=nil)
15
+ @items = items || []
16
+ end
17
+
18
+ # Find all XML files in +path+ and return
19
+ # a list of instances of +cls+.
20
+ def discover(path, cls=Slaw::Act)
21
+ for fname in Dir.glob("#{path}/**/*.xml")
22
+ @items << cls.new(fname)
23
+ end
24
+ end
25
+
26
+ # Try to find an act who's FRBRuri matches this one,
27
+ # returning nil on failure
28
+ def for_uri(uri)
29
+ return @items.find { |doc| doc.id_uri == uri }
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,107 @@
1
+ require 'elasticsearch'
2
+ require 'log4r'
3
+
4
+ module Slaw
5
+ # Support for indexing and search using elasticsearch
6
+ class ElasticSearchSupport
7
+ attr_accessor :es, :mapping, :index, :type, :base_url
8
+
9
+ def initialize(index, type, base_url, client_params={}, es=nil)
10
+ @es = es || create_client(client_params)
11
+
12
+ @ix = index
13
+ @type = type
14
+ @base_url = base_url
15
+
16
+ @mapping = {
17
+ frbr_uri: {type: 'string', index: 'not_analyzed'},
18
+ url: {type: 'string', index: 'not_analyzed'},
19
+ title: {type: 'string', analyzer: 'english'},
20
+ content: {type: 'string', analyzer: 'english'},
21
+ published_on: {type: 'date', format: 'dateOptionalTime'},
22
+ region: {type: 'string', index: 'not_analyzed'},
23
+ region_name: {type: 'string', index: 'not_analyzed'},
24
+ repealed: {type: 'boolean'},
25
+ }
26
+
27
+ @log = Log4r::Logger['Slaw']
28
+ end
29
+
30
+ def create_client(client_params)
31
+ Elasticsearch::Client.new(client_params)
32
+ end
33
+
34
+ def reindex!(docs, &block)
35
+ define_mapping!
36
+ index_documents!(docs, &block)
37
+ end
38
+
39
+ def index_documents!(docs, &block)
40
+ for doc in docs
41
+ id = doc.id_uri.gsub('/', '-')
42
+
43
+ data = {
44
+ frbr_uri: doc.id_uri,
45
+ url: @base_url + doc.id_uri,
46
+ title: doc.short_title,
47
+ content: doc.body.text,
48
+ region: doc.region,
49
+ published_on: doc.publication['date'],
50
+ repealed: doc.repealed?,
51
+ }
52
+
53
+ yield doc, data if block_given?
54
+
55
+ @log.info("Indexing #{id}")
56
+ @es.index(index: @ix, type: @type, id: id, body: data)
57
+ end
58
+ end
59
+
60
+ def define_mapping!
61
+ @log.info("Deleting index")
62
+ @es.indices.create(index: @ix) unless @es.indices.exists(index: @ix)
63
+
64
+ # delete existing mapping
65
+ unless @es.indices.get_mapping(index: @ix, type: @type).empty?
66
+ @es.indices.delete_mapping(index: @ix, type: @type)
67
+ end
68
+
69
+ @log.info("Defining mappings")
70
+ @es.indices.put_mapping(index: @ix, type: @type, body: {
71
+ @type => {properties: @mapping}
72
+ })
73
+ end
74
+
75
+ def search(q, from=0, size=10)
76
+ @es.search(index: @ix, body: {
77
+ query: {
78
+ multi_match: {
79
+ query: q,
80
+ type: 'cross_fields',
81
+ fields: ['title', 'content'],
82
+ }
83
+ },
84
+ fields: ['frbr_uri', 'repealed', 'published_on', 'title', 'url', 'region_name'],
85
+ highlight: {
86
+ order: "score",
87
+ fields: {
88
+ content: {
89
+ fragment_size: 80,
90
+ number_of_fragments: 2,
91
+ },
92
+ title: {
93
+ number_of_fragments: 0, # entire field
94
+ }
95
+ },
96
+ pre_tags: ['<mark>'],
97
+ post_tags: ['</mark>'],
98
+ },
99
+ from: from,
100
+ size: size,
101
+ sort: {
102
+ '_score' => {order: 'desc'}
103
+ }
104
+ })
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,23 @@
1
+ module Slaw
2
+ # An event in the lifecycle of an act
3
+ class LifecycleEvent
4
+ include Slaw::Namespace
5
+
6
+ # Date of the event
7
+ attr_accessor :date
8
+
9
+ # type of the event
10
+ attr_accessor :type
11
+
12
+ # the source of the event, an XML reference element
13
+ attr_accessor :source
14
+
15
+ def initialize(element)
16
+ @date = element['date']
17
+ @type = element['type']
18
+
19
+ source_id = element['source'][1..-1]
20
+ @source = element.document.at_xpath("//a:references/*[@id=\"#{source_id}\"]", a: NS)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,14 @@
1
+ require 'log4r'
2
+
3
+ module Slaw
4
+ module Logging
5
+
6
+ # Get an instance to a logger configured for the class that includes it.
7
+ # This allows log messages to include the class name
8
+ def logger
9
+ return @logger if @logger
10
+
11
+ @logger = Log4r::Logger[self.class.name] || Log4r::Logger.new(self.class.name)
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,7 @@
1
+ module Slaw
2
+ module Namespace
3
+ NS = "http://www.akomantoso.org/2.0"
4
+ end
5
+
6
+ include Namespace
7
+ end