slaw 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a0da8d0d88cfd753f8ef854937248c8881440390
4
+ data.tar.gz: 0762a47f6b0bac65d4b3fe829bc2a4997f3b23f9
5
+ SHA512:
6
+ metadata.gz: 4ab788b276cd06d1735bb859a1f7fa08820f9bf65b2c6282df65ea1fd2303cbd5b42433366a3a0b2a7a20dbe227e78cc6b5caa2ab3b5cb988d6c2a27097f05ce
7
+ data.tar.gz: 3882e5a3b292dfcd9adecb0b2077f9cb21a5d5e76e90402a09c77f146a1ec3acb0272649daf03cc64556864bfd5d8a921d873cbb20534c626542894a02218372
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+ ruby '2.1.1'
3
+
4
+ # Specify your gem's dependencies in slaw.gemspec
5
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Greg Kempe
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # Slaw
2
+
3
+ Slaw is a lightweight library for rendering and generating Akoma Ntoso acts from plain text and PDF documents.
4
+ It is used to power [openbylaws.org.za](http://openbylaws.org.za).
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'slaw'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install slaw
19
+
20
+ ## Usage
21
+
22
+ TODO: Write usage instructions here
23
+
24
+ ## Contributing
25
+
26
+ 1. Fork it ( http://github.com/longhotsummer/slaw/fork )
27
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
28
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
29
+ 4. Push to the branch (`git push origin my-new-feature`)
30
+ 5. Create new Pull Request
31
+
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ begin
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ rescue LoadError
7
+ end
data/lib/slaw/act.rb ADDED
@@ -0,0 +1,243 @@
1
+ module Slaw
2
+ # Wraps an AkomaNtoso 2.0 XML document describing an Act.
3
+ class Act
4
+ include Slaw::Namespace
5
+
6
+ # Allow us to jump from the XML document for an act to the
7
+ # Act instance itself
8
+ @@acts = {}
9
+
10
+ attr_accessor :doc, :meta, :body, :num, :year, :id_uri
11
+ attr_accessor :filename, :mtime
12
+
13
+ def self.for_node(node)
14
+ @@acts[node.document]
15
+ end
16
+
17
+ # Create a new instance
18
+ def initialize(filename=nil)
19
+ self.load(filename) if filename
20
+ end
21
+
22
+ # Load the XML from +filename+
23
+ def load(filename)
24
+ @filename = filename
25
+ @mtime = File::mtime(@filename)
26
+
27
+ File.open(filename) { |f| parse(f) }
28
+ end
29
+
30
+ # Parse the XML contained in the file-like object +io+
31
+ def parse(io)
32
+ @doc = Nokogiri::XML(io)
33
+ @meta = @doc.at_xpath('/a:akomaNtoso/a:act/a:meta', a: NS)
34
+ @body = @doc.at_xpath('/a:akomaNtoso/a:act/a:body', a: NS)
35
+
36
+ @@acts[@doc] = self
37
+
38
+ extract_id
39
+ end
40
+
41
+ def extract_id
42
+ @id_uri = @meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRuri', a: NS)['value']
43
+ empty, @country, type, date, @num = @id_uri.split('/')
44
+
45
+ # yyyy-mm-dd
46
+ @year = date.split('-', 2)[0]
47
+ end
48
+
49
+ def short_title
50
+ unless @short_title
51
+ node = @meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRalias', a: NS)
52
+ if node
53
+ @short_title = node['value']
54
+ else
55
+ @short_title = "Act #{num} of #{year}"
56
+ end
57
+ end
58
+
59
+ @short_title
60
+ end
61
+
62
+ def url_path
63
+ "/#{@country}/acts/#{@year}/#{@num}/"
64
+ end
65
+
66
+ def url_file
67
+ "act-#{@year}-#{@num}"
68
+ end
69
+
70
+ # Has this act been amended?
71
+ def amended?
72
+ @doc.at_xpath('/a:akomaNtoso/a:act', a: NS)['contains'] != 'originalVersion'
73
+ end
74
+
75
+ # a list of LifecycleEvent objects for amendment events, in date order
76
+ def amendment_events
77
+ @meta.xpath('./a:lifecycle/a:eventRef[@type="amendment"]', a: NS).map do |event|
78
+ LifecycleEvent.new(event)
79
+ end.sort_by { |e| e.date }
80
+ end
81
+
82
+ # Mark this act as being amended by another act, either +act+
83
+ # or the details in +opts+:
84
+ #
85
+ # :uri: uri of the amending act
86
+ # :title: title of the amending act
87
+ # :date: date of the amendment
88
+ #
89
+ # It is assumed that there can be only one amendment event on a particular
90
+ # date. An existing amendment on this date is overwritten.
91
+ def amended_by!(act, opts={})
92
+ if act
93
+ opts[:uri] ||= act.id_uri
94
+ opts[:title] ||= act.short_title
95
+ opts[:date] ||= act.publication['date']
96
+ end
97
+
98
+ date = opts[:date]
99
+ source_id = "amendment-#{date}"
100
+
101
+ # assume we now hold a single version and not the original version
102
+ @doc.at_xpath('/a:akomaNtoso/a:act', a: NS)['contains'] = 'singleVersion'
103
+
104
+ # add the lifecycle event
105
+ lifecycle = @meta.at_xpath('./a:lifecycle', a: NS)
106
+ if not lifecycle
107
+ lifecycle = @doc.create_element('lifecycle', source: "#this")
108
+ @meta.at_xpath('./a:publication', a: NS).after(lifecycle)
109
+ end
110
+
111
+ event = lifecycle.at_xpath('./a:eventRef[@date="' + date + '"][@type="amendment"]', a: NS)
112
+ if event
113
+ # clear up old event
114
+ src = @doc.at_css(event['source'])
115
+ src.remove if src
116
+ else
117
+ # new event
118
+ event = @doc.create_element('eventRef', type: 'amendment')
119
+ lifecycle << event
120
+ end
121
+
122
+ event['date'] = date
123
+ event['id'] = "amendment-event-#{date}"
124
+ event['source'] = '#' + source_id
125
+
126
+ # add reference
127
+ ref = @doc.create_element('passiveRef',
128
+ id: source_id,
129
+ href: opts[:uri],
130
+ showAs: opts[:title])
131
+
132
+ @meta.at_xpath('./a:references/a:TLCTerm', a: NS).before(ref)
133
+ end
134
+
135
+ # Does this Act have parts?
136
+ def parts?
137
+ !parts.empty?
138
+ end
139
+
140
+ def parts
141
+ @body.xpath('./a:part', a: NS)
142
+ end
143
+
144
+ def chapters?
145
+ !chapters.empty?
146
+ end
147
+
148
+ def chapters
149
+ @body.xpath('./a:chapter', a: NS)
150
+ end
151
+
152
+ def sections
153
+ @body.xpath('.//a:section', a: NS)
154
+ end
155
+
156
+ # The XML node representing the definitions section
157
+ def definitions
158
+ # try looking for the definition list
159
+ defn = @body.at_css('#definitions')
160
+ return defn.parent if defn
161
+
162
+ # try looking for the heading
163
+ defn = @body.at_xpath('.//a:section/a:heading[text() = "Definitions"]', a: NS)
164
+ return defn.parent if defn
165
+
166
+ nil
167
+ end
168
+
169
+ # The XML node representing the schedules document
170
+ def schedules
171
+ @doc.at_xpath('/a:akomaNtoso/a:components/a:component/a:doc[@name="schedules"]/a:mainBody', a: NS)
172
+ end
173
+
174
+ # Get a map from term ids to +[term, defn]+ pairs,
175
+ # where +term+ is the text term NS+defn+ is
176
+ # the XML node with the definition in it.
177
+ def term_definitions
178
+ terms = {}
179
+
180
+ @meta.xpath('a:references/a:TLCTerm', a: NS).each do |node|
181
+ # <TLCTerm id="term-affected_land" href="/ontology/term/this.eng.affected_land" showAs="affected land"/>
182
+
183
+ # find the point with id 'def-term-foo'
184
+ defn = @body.at_xpath(".//*[@id='def-#{node['id']}']", a: NS)
185
+ next unless defn
186
+
187
+ terms[node['id']] = [node['showAs'], defn]
188
+ end
189
+
190
+ terms
191
+ end
192
+
193
+ # Returns the publication element, if any.
194
+ def publication
195
+ @meta.at_xpath('./a:publication', a: NS)
196
+ end
197
+
198
+ # Has this by-law been repealed?
199
+ def repealed?
200
+ !!repealed_on
201
+ end
202
+
203
+ # The date on which this act was repealed, or nil if never repealed
204
+ def repealed_on
205
+ repeal_el = repeal
206
+ repeal_el ? Time.parse(repeal_el['date']) : nil
207
+ end
208
+
209
+ # The element representing the reference that caused the repeal of this
210
+ # act, or nil
211
+ def repealed_by
212
+ repeal_el = repeal
213
+ return nil unless repeal_el
214
+
215
+ source_id = repeal_el['source'].sub(/^#/, '')
216
+ @meta.at_xpath("./a:references/a:passiveRef[@id='#{source_id}']", a: NS)
217
+ end
218
+
219
+ # The XML element representing the repeal of this act, or nil
220
+ def repeal
221
+ # <lifecycle source="#this">
222
+ # <eventRef id="e1" date="2010-07-28" source="#original" type="generation"/>
223
+ # <eventRef id="e2" date="2012-04-26" source="#amendment-1" type="amendment"/>
224
+ # <eventRef id="e3" date="2014-01-17" source="#repeal" type="repeal"/>
225
+ # </lifecycle>
226
+ @meta.at_xpath('./a:lifecycle/a:eventRef[@type="repeal"]', a: NS)
227
+ end
228
+
229
+ def manifestation_date
230
+ node = @meta.at_xpath('./a:identification/a:FRBRManifestation/a:FRBRdate[@name="Generation"]', a: NS)
231
+ node && node['date']
232
+ end
233
+
234
+ def nature
235
+ "act"
236
+ end
237
+
238
+ def inspect
239
+ "<#{self.class.name} @id_uri=\"#{@id_uri}\">"
240
+ end
241
+ end
242
+
243
+ end
data/lib/slaw/bylaw.rb ADDED
@@ -0,0 +1,53 @@
1
+ require 'slaw/act'
2
+
3
+ module Slaw
4
+ # Wraps an AkomaNtoso XML document describing an Act classed as a By-Law
5
+ class ByLaw < Act
6
+
7
+ attr_accessor :region, :name
8
+
9
+ def extract_id
10
+ # /za/by-law/cape-town/2010/public-parks
11
+
12
+ @id_uri = @meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRuri', a: NS)['value']
13
+ empty, @country, type, @region, date, @name = @id_uri.split('/')
14
+
15
+ # yyyy[-mm-dd]
16
+ @year = date.split('-', 2)[0]
17
+ end
18
+
19
+ # ByLaws don't have numbers, use their short-name instead
20
+ def num
21
+ name
22
+ end
23
+
24
+ def short_title
25
+ unless @short_title
26
+ node = @meta.at_xpath('./a:identification/a:FRBRWork/a:FRBRalias', a: NS)
27
+ if node
28
+ @short_title = node['value']
29
+ else
30
+ @short_title = "(Unknown)"
31
+ end
32
+
33
+ if amended? and not @short_title.end_with?("as amended")
34
+ @short_title = @short_title + " as amended"
35
+ end
36
+ end
37
+
38
+ @short_title
39
+ end
40
+
41
+ def url_path
42
+ "/#{@country}/by-law/#{@region}/#{@year}/#{@name}/"
43
+ end
44
+
45
+ def url_file
46
+ @name
47
+ end
48
+
49
+ def nature
50
+ "by-law"
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,32 @@
1
+ require 'forwardable'
2
+
3
+ module Slaw
4
+ # A collection of Act instances.
5
+ class DocumentCollection
6
+
7
+ include Enumerable
8
+ extend Forwardable
9
+
10
+ attr_accessor :items
11
+
12
+ def_delegators :items, :each, :<<, :length
13
+
14
+ def initialize(items=nil)
15
+ @items = items || []
16
+ end
17
+
18
+ # Find all XML files in +path+ and return
19
+ # a list of instances of +cls+.
20
+ def discover(path, cls=Slaw::Act)
21
+ for fname in Dir.glob("#{path}/**/*.xml")
22
+ @items << cls.new(fname)
23
+ end
24
+ end
25
+
26
+ # Try to find an act who's FRBRuri matches this one,
27
+ # returning nil on failure
28
+ def for_uri(uri)
29
+ return @items.find { |doc| doc.id_uri == uri }
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,107 @@
1
+ require 'elasticsearch'
2
+ require 'log4r'
3
+
4
+ module Slaw
5
+ # Support for indexing and search using elasticsearch
6
+ class ElasticSearchSupport
7
+ attr_accessor :es, :mapping, :index, :type, :base_url
8
+
9
+ def initialize(index, type, base_url, client_params={}, es=nil)
10
+ @es = es || create_client(client_params)
11
+
12
+ @ix = index
13
+ @type = type
14
+ @base_url = base_url
15
+
16
+ @mapping = {
17
+ frbr_uri: {type: 'string', index: 'not_analyzed'},
18
+ url: {type: 'string', index: 'not_analyzed'},
19
+ title: {type: 'string', analyzer: 'english'},
20
+ content: {type: 'string', analyzer: 'english'},
21
+ published_on: {type: 'date', format: 'dateOptionalTime'},
22
+ region: {type: 'string', index: 'not_analyzed'},
23
+ region_name: {type: 'string', index: 'not_analyzed'},
24
+ repealed: {type: 'boolean'},
25
+ }
26
+
27
+ @log = Log4r::Logger['Slaw']
28
+ end
29
+
30
+ def create_client(client_params)
31
+ Elasticsearch::Client.new(client_params)
32
+ end
33
+
34
+ def reindex!(docs, &block)
35
+ define_mapping!
36
+ index_documents!(docs, &block)
37
+ end
38
+
39
+ def index_documents!(docs, &block)
40
+ for doc in docs
41
+ id = doc.id_uri.gsub('/', '-')
42
+
43
+ data = {
44
+ frbr_uri: doc.id_uri,
45
+ url: @base_url + doc.id_uri,
46
+ title: doc.short_title,
47
+ content: doc.body.text,
48
+ region: doc.region,
49
+ published_on: doc.publication['date'],
50
+ repealed: doc.repealed?,
51
+ }
52
+
53
+ yield doc, data if block_given?
54
+
55
+ @log.info("Indexing #{id}")
56
+ @es.index(index: @ix, type: @type, id: id, body: data)
57
+ end
58
+ end
59
+
60
+ def define_mapping!
61
+ @log.info("Deleting index")
62
+ @es.indices.create(index: @ix) unless @es.indices.exists(index: @ix)
63
+
64
+ # delete existing mapping
65
+ unless @es.indices.get_mapping(index: @ix, type: @type).empty?
66
+ @es.indices.delete_mapping(index: @ix, type: @type)
67
+ end
68
+
69
+ @log.info("Defining mappings")
70
+ @es.indices.put_mapping(index: @ix, type: @type, body: {
71
+ @type => {properties: @mapping}
72
+ })
73
+ end
74
+
75
+ def search(q, from=0, size=10)
76
+ @es.search(index: @ix, body: {
77
+ query: {
78
+ multi_match: {
79
+ query: q,
80
+ type: 'cross_fields',
81
+ fields: ['title', 'content'],
82
+ }
83
+ },
84
+ fields: ['frbr_uri', 'repealed', 'published_on', 'title', 'url', 'region_name'],
85
+ highlight: {
86
+ order: "score",
87
+ fields: {
88
+ content: {
89
+ fragment_size: 80,
90
+ number_of_fragments: 2,
91
+ },
92
+ title: {
93
+ number_of_fragments: 0, # entire field
94
+ }
95
+ },
96
+ pre_tags: ['<mark>'],
97
+ post_tags: ['</mark>'],
98
+ },
99
+ from: from,
100
+ size: size,
101
+ sort: {
102
+ '_score' => {order: 'desc'}
103
+ }
104
+ })
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,23 @@
1
+ module Slaw
2
+ # An event in the lifecycle of an act
3
+ class LifecycleEvent
4
+ include Slaw::Namespace
5
+
6
+ # Date of the event
7
+ attr_accessor :date
8
+
9
+ # type of the event
10
+ attr_accessor :type
11
+
12
+ # the source of the event, an XML reference element
13
+ attr_accessor :source
14
+
15
+ def initialize(element)
16
+ @date = element['date']
17
+ @type = element['type']
18
+
19
+ source_id = element['source'][1..-1]
20
+ @source = element.document.at_xpath("//a:references/*[@id=\"#{source_id}\"]", a: NS)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,14 @@
1
+ require 'log4r'
2
+
3
+ module Slaw
4
+ module Logging
5
+
6
+ # Get an instance to a logger configured for the class that includes it.
7
+ # This allows log messages to include the class name
8
+ def logger
9
+ return @logger if @logger
10
+
11
+ @logger = Log4r::Logger[self.class.name] || Log4r::Logger.new(self.class.name)
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,7 @@
1
+ module Slaw
2
+ module Namespace
3
+ NS = "http://www.akomantoso.org/2.0"
4
+ end
5
+
6
+ include Namespace
7
+ end