slaw 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/slaw +20 -0
- data/lib/slaw/act.rb +28 -17
- data/lib/slaw/generator.rb +3 -0
- data/lib/slaw/parse/builder.rb +19 -3
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/za/act.treetop +1 -1
- data/lib/slaw/za/act_nodes.rb +7 -7
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 75915642fffe0d65b3d4386745c12cd019e3d715
         | 
| 4 | 
            +
              data.tar.gz: fb183f90a301b479fd4d422d7da479f8796e808d
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: acf9d6a07f1a927461394721228a8ebd6548fe79c5604e59f36463bdb4bd0a25df1bafa8aa391a937ed45756f1a70f85e9c48469d752c92817c04f392f62c169
         | 
| 7 | 
            +
              data.tar.gz: 4ada4ce9e47e9d71bdcf55f1fc8c94c7977e03711b46d0ec86ceae17f8df5a2efcc7a23aa171adf967a0c24a3fd889d80f2aefdee3ebd6c6fb0f0893f14598d6
         | 
    
        data/bin/slaw
    CHANGED
    
    | @@ -12,9 +12,15 @@ class SlawCLI < Thor | |
| 12 12 | 
             
              option :input, enum: ['text', 'pdf'], desc: "Type of input if it can't be determined automatically"
         | 
| 13 13 | 
             
              option :pdftotext, desc: "Location of the pdftotext binary if not in PATH"
         | 
| 14 14 | 
             
              option :definitions, type: :boolean, desc: "Find and link definitions (this can be slow). Default: false"
         | 
| 15 | 
            +
              option :fragment, type: :string, desc: "Akoma Ntoso element name that the imported text represents. Support depends on the grammar."
         | 
| 16 | 
            +
              option :id_prefix, type: :string, desc: "Prefix to be used when generating ID elements when parsing a fragment."
         | 
| 15 17 | 
             
              def parse(name)
         | 
| 16 18 | 
             
                logging
         | 
| 17 19 |  | 
| 20 | 
            +
                if options[:fragment] and options[:definitions]
         | 
| 21 | 
            +
                  raise Thor::Error.new("--definitions can't be used together with --fragment")
         | 
| 22 | 
            +
                end
         | 
| 23 | 
            +
             | 
| 18 24 | 
             
                Slaw::Extract::Extractor.pdftotext_path = options[:pdftotext] if options[:pdftotext]
         | 
| 19 25 | 
             
                extractor = Slaw::Extract::Extractor.new
         | 
| 20 26 |  | 
| @@ -28,6 +34,20 @@ class SlawCLI < Thor | |
| 28 34 | 
             
                end
         | 
| 29 35 |  | 
| 30 36 | 
             
                generator = Slaw::ActGenerator.new
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                if options[:fragment]
         | 
| 39 | 
            +
                  generator.document_class = Slaw::Fragment
         | 
| 40 | 
            +
                  fragment = options[:fragment]
         | 
| 41 | 
            +
                  fragment = 'act' if fragment.downcase == 'akomantoso'
         | 
| 42 | 
            +
                  generator.builder.parse_options[:root] = fragment
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                  if options[:id_prefix]
         | 
| 45 | 
            +
                    prefix = options[:id_prefix]
         | 
| 46 | 
            +
                    prefix += "." unless prefix.end_with?('.')
         | 
| 47 | 
            +
                    generator.builder.fragment_id_prefix = prefix
         | 
| 48 | 
            +
                  end
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
             | 
| 31 51 | 
             
                act = generator.generate_from_text(text)
         | 
| 32 52 |  | 
| 33 53 | 
             
                # definitions?
         | 
    
        data/lib/slaw/act.rb
    CHANGED
    
    | @@ -1,4 +1,31 @@ | |
| 1 1 | 
             
            module Slaw
         | 
| 2 | 
            +
              class AknBase
         | 
| 3 | 
            +
                include Slaw::Namespace
         | 
| 4 | 
            +
             | 
| 5 | 
            +
                attr_accessor :doc
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                # Serialise the XML for this act, passing `args` to the Nokogiri serialiser.
         | 
| 8 | 
            +
                # The most useful argument is usually `indent: 2` if you like your XML perdy.
         | 
| 9 | 
            +
                #
         | 
| 10 | 
            +
                # @return [String] serialized XML
         | 
| 11 | 
            +
                def to_xml(*args)
         | 
| 12 | 
            +
                  @doc.to_xml(*args)
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
                
         | 
| 15 | 
            +
                # Parse the XML contained in the file-like or String object `io`
         | 
| 16 | 
            +
                #
         | 
| 17 | 
            +
                # @param io [String, file-like] io object or String with XML
         | 
| 18 | 
            +
                def parse(io)
         | 
| 19 | 
            +
                  self.doc = Nokogiri::XML(io)
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              # A fragment is a part of a larger document and doesn't have the context associated
         | 
| 24 | 
            +
              # with the document.
         | 
| 25 | 
            +
              class Fragment < AknBase
         | 
| 26 | 
            +
                alias_method :fragment, :doc
         | 
| 27 | 
            +
              end
         | 
| 28 | 
            +
             | 
| 2 29 | 
             
              # An Act wraps a single {http://www.akomantoso.org/ AkomaNtoso 2.0 XML} act document in the form of a
         | 
| 3 30 | 
             
              # Nokogiri::XML::Document object.
         | 
| 4 31 | 
             
              #
         | 
| @@ -7,8 +34,7 @@ module Slaw | |
| 7 34 | 
             
              # identifying whether it has been amended ({#amended?}), repealed
         | 
| 8 35 | 
             
              # ({#repealed?}) or what chapters ({#chapters}), parts ({#parts}) and
         | 
| 9 36 | 
             
              # sections ({#sections}) it contains.
         | 
| 10 | 
            -
              class Act
         | 
| 11 | 
            -
                include Slaw::Namespace
         | 
| 37 | 
            +
              class Act < AknBase
         | 
| 12 38 |  | 
| 13 39 | 
             
                # Allow us to jump from the XML document for an act to the
         | 
| 14 40 | 
             
                # Act instance itself
         | 
| @@ -66,13 +92,6 @@ module Slaw | |
| 66 92 |  | 
| 67 93 | 
             
                  File.open(filename) { |f| parse(f) }
         | 
| 68 94 | 
             
                end
         | 
| 69 | 
            -
                
         | 
| 70 | 
            -
                # Parse the XML contained in the file-like or String object `io`
         | 
| 71 | 
            -
                #
         | 
| 72 | 
            -
                # @param io [String, file-like] io object or String with XML
         | 
| 73 | 
            -
                def parse(io)
         | 
| 74 | 
            -
                  self.doc = Nokogiri::XML(io)
         | 
| 75 | 
            -
                end
         | 
| 76 95 |  | 
| 77 96 | 
             
                # Set the XML document backing this bylaw.
         | 
| 78 97 | 
             
                #
         | 
| @@ -403,14 +422,6 @@ module Slaw | |
| 403 422 | 
             
                  validate.empty?
         | 
| 404 423 | 
             
                end
         | 
| 405 424 |  | 
| 406 | 
            -
                # Serialise the XML for this act, passing `args` to the Nokogiri serialiser.
         | 
| 407 | 
            -
                # The most useful argument is usually `indent: 2` if you like your XML perdy.
         | 
| 408 | 
            -
                #
         | 
| 409 | 
            -
                # @return [String] serialized XML
         | 
| 410 | 
            -
                def to_xml(*args)
         | 
| 411 | 
            -
                  @doc.to_xml(*args)
         | 
| 412 | 
            -
                end
         | 
| 413 | 
            -
             | 
| 414 425 | 
             
                def inspect
         | 
| 415 426 | 
             
                  "<#{self.class.name} @id_uri=\"#{@id_uri}\">"
         | 
| 416 427 | 
             
                end
         | 
    
        data/lib/slaw/generator.rb
    CHANGED
    
    | @@ -9,6 +9,9 @@ module Slaw | |
| 9 9 | 
             
                # [Slaw::Parse::Builder] builder used by the generator
         | 
| 10 10 | 
             
                attr_accessor :builder
         | 
| 11 11 |  | 
| 12 | 
            +
                # The type that will hold the generated document
         | 
| 13 | 
            +
                attr_accessor :document_class
         | 
| 14 | 
            +
             | 
| 12 15 | 
             
                def initialize
         | 
| 13 16 | 
             
                  @parser = Slaw::ZA::ActParser.new
         | 
| 14 17 | 
             
                  @builder = Slaw::Parse::Builder.new(parser: @parser)
         | 
    
        data/lib/slaw/parse/builder.rb
    CHANGED
    
    | @@ -26,6 +26,12 @@ module Slaw | |
| 26 26 |  | 
| 27 27 | 
             
                  @@parsers = {}
         | 
| 28 28 |  | 
| 29 | 
            +
                  # Additional hash of options to be provided to the parser when parsing.
         | 
| 30 | 
            +
                  attr_accessor :parse_options
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                  # Prefix to use when generating IDs for fragments
         | 
| 33 | 
            +
                  attr_accessor :fragment_id_prefix
         | 
| 34 | 
            +
             | 
| 29 35 | 
             
                  # Create a new builder.
         | 
| 30 36 | 
             
                  #
         | 
| 31 37 | 
             
                  # Specify either `:parser` or `:grammar_file` and `:grammar_class`.
         | 
| @@ -49,6 +55,8 @@ module Slaw | |
| 49 55 | 
             
                    else
         | 
| 50 56 | 
             
                      raise ArgumentError.new("Specify either :parser or :grammar_file and :grammar_class")
         | 
| 51 57 | 
             
                    end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                    @parse_options = {}
         | 
| 52 60 | 
             
                  end
         | 
| 53 61 |  | 
| 54 62 | 
             
                  # Do all the work necessary to parse text into a well-formed XML document.
         | 
| @@ -65,7 +73,7 @@ module Slaw | |
| 65 73 | 
             
                  # resulting XML to normalise it.
         | 
| 66 74 | 
             
                  #
         | 
| 67 75 | 
             
                  # @param text [String] the text to parse
         | 
| 68 | 
            -
                  # @param parse_options [Hash] options to  | 
| 76 | 
            +
                  # @param parse_options [Hash] options to pass to the parser
         | 
| 69 77 | 
             
                  #
         | 
| 70 78 | 
             
                  # @return [String] an XML string
         | 
| 71 79 | 
             
                  def parse_text(text, parse_options={})
         | 
| @@ -76,11 +84,12 @@ module Slaw | |
| 76 84 | 
             
                  # Parse plain text into a syntax tree.
         | 
| 77 85 | 
             
                  #
         | 
| 78 86 | 
             
                  # @param text [String] the text to parse
         | 
| 79 | 
            -
                  # @param parse_options [Hash] options to  | 
| 87 | 
            +
                  # @param parse_options [Hash] options to pass to the parser
         | 
| 80 88 | 
             
                  #
         | 
| 81 89 | 
             
                  # @return [Object] the root of the resulting parse tree, usually a Treetop::Runtime::SyntaxNode object
         | 
| 82 90 | 
             
                  def text_to_syntax_tree(text, parse_options={})
         | 
| 83 91 | 
             
                    logger.info("Parsing...")
         | 
| 92 | 
            +
                    parse_options = @parse_options.dup.update(parse_options)
         | 
| 84 93 | 
             
                    tree = @parser.parse(text, parse_options)
         | 
| 85 94 | 
             
                    logger.info("Parsed!")
         | 
| 86 95 |  | 
| @@ -107,7 +116,14 @@ module Slaw | |
| 107 116 | 
             
                    builder.akomaNtoso("xmlns:xsi"=> "http://www.w3.org/2001/XMLSchema-instance", 
         | 
| 108 117 | 
             
                                       "xsi:schemaLocation" => "http://www.akomantoso.org/2.0 akomantoso20.xsd",
         | 
| 109 118 | 
             
                                       "xmlns" => NS) { |b|
         | 
| 110 | 
            -
                       | 
| 119 | 
            +
                      args = [b]
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                      # should we provide an id prefix?
         | 
| 122 | 
            +
                      arity = tree.method('to_xml').arity 
         | 
| 123 | 
            +
                      arity = arity.abs-1 if arity < 0
         | 
| 124 | 
            +
                      args << (fragment_id_prefix || "") if arity > 1
         | 
| 125 | 
            +
             | 
| 126 | 
            +
                      tree.to_xml(*args)
         | 
| 111 127 | 
             
                    }
         | 
| 112 128 |  | 
| 113 129 | 
             
                    s
         | 
    
        data/lib/slaw/version.rb
    CHANGED
    
    
    
        data/lib/slaw/za/act.treetop
    CHANGED
    
    | @@ -82,7 +82,7 @@ module Slaw | |
| 82 82 | 
             
                    #
         | 
| 83 83 | 
             
                    # Additionally, the section title is optional.
         | 
| 84 84 | 
             
                    !{ |s| options[:section_number_after_title] }
         | 
| 85 | 
            -
                    section_title_prefix section_title:section_title_content? eol?
         | 
| 85 | 
            +
                    space? section_title_prefix section_title:section_title_content? eol?
         | 
| 86 86 | 
             
                    <SectionTitleType2>
         | 
| 87 87 | 
             
                  end
         | 
| 88 88 |  | 
    
        data/lib/slaw/za/act_nodes.rb
    CHANGED
    
    | @@ -7,7 +7,7 @@ module Slaw | |
| 7 7 | 
             
                    EXPRESSION_URI = "#{FRBR_URI}/eng@"
         | 
| 8 8 | 
             
                    MANIFESTATION_URI = EXPRESSION_URI
         | 
| 9 9 |  | 
| 10 | 
            -
                    def to_xml(b)
         | 
| 10 | 
            +
                    def to_xml(b, idprefix)
         | 
| 11 11 | 
             
                      b.act(contains: "originalVersion") { |b|
         | 
| 12 12 | 
             
                        write_meta(b)
         | 
| 13 13 | 
             
                        write_preamble(b)
         | 
| @@ -192,7 +192,7 @@ module Slaw | |
| 192 192 |  | 
| 193 193 | 
             
                        idprefix = "#{id}."
         | 
| 194 194 |  | 
| 195 | 
            -
                        subsections.elements.each_with_index { |e, i| e.to_xml(b,  | 
| 195 | 
            +
                        subsections.elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
         | 
| 196 196 | 
             
                      }
         | 
| 197 197 | 
             
                    end
         | 
| 198 198 | 
             
                  end
         | 
| @@ -231,7 +231,7 @@ module Slaw | |
| 231 231 | 
             
                  end
         | 
| 232 232 |  | 
| 233 233 | 
             
                  class Subsection < Treetop::Runtime::SyntaxNode
         | 
| 234 | 
            -
                    def to_xml(b,  | 
| 234 | 
            +
                    def to_xml(b, idprefix, i=0)
         | 
| 235 235 | 
             
                      if statement.is_a?(NumberedStatement)
         | 
| 236 236 | 
             
                        attribs = {id: idprefix + statement.num.gsub(/[()]/, '')}
         | 
| 237 237 | 
             
                      else
         | 
| @@ -246,9 +246,9 @@ module Slaw | |
| 246 246 | 
             
                        b.content { |b| 
         | 
| 247 247 | 
             
                          if blocklist and blocklist.is_a?(Blocklist)
         | 
| 248 248 | 
             
                            if statement.content
         | 
| 249 | 
            -
                              blocklist.to_xml(b,  | 
| 249 | 
            +
                              blocklist.to_xml(b, idprefix, i) { |b| b << statement.content.text_value }
         | 
| 250 250 | 
             
                            else
         | 
| 251 | 
            -
                              blocklist.to_xml(b,  | 
| 251 | 
            +
                              blocklist.to_xml(b, idprefix, i)
         | 
| 252 252 | 
             
                            end
         | 
| 253 253 | 
             
                          else
         | 
| 254 254 | 
             
                            # raw content
         | 
| @@ -283,7 +283,7 @@ module Slaw | |
| 283 283 | 
             
                  class Blocklist < Treetop::Runtime::SyntaxNode
         | 
| 284 284 | 
             
                    # Render a block list to xml. If a block is given,
         | 
| 285 285 | 
             
                    # yield to it a builder to insert a listIntroduction node
         | 
| 286 | 
            -
                    def to_xml(b,  | 
| 286 | 
            +
                    def to_xml(b, idprefix, i=0, &block)
         | 
| 287 287 | 
             
                      id = idprefix + "list#{i}"
         | 
| 288 288 | 
             
                      idprefix = id + '.'
         | 
| 289 289 |  | 
| @@ -382,7 +382,7 @@ module Slaw | |
| 382 382 | 
             
                      end
         | 
| 383 383 | 
             
                    end
         | 
| 384 384 |  | 
| 385 | 
            -
                    def to_xml(b, i)
         | 
| 385 | 
            +
                    def to_xml(b, idprefix, i=0)
         | 
| 386 386 | 
             
                      n = num.nil? ? i : num
         | 
| 387 387 | 
             
                      id = "schedule-#{n}"
         | 
| 388 388 |  | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: slaw
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.6. | 
| 4 | 
            +
              version: 0.6.2
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Greg Kempe
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2015-04- | 
| 11 | 
            +
            date: 2015-04-30 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: bundler
         |