RubyGems - d-mark - Versions diffs - 0.1 → 0.2.0 - Mend

d-mark 0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

checksums.yaml +4 -4
data/Gemfile +4 -7
data/Gemfile.lock +16 -12
data/Guardfile +3 -0
data/NEWS.md +11 -3
data/README.adoc +218 -0
data/Rakefile +13 -2
data/d-mark.gemspec +5 -4
data/lib/d-mark.rb +2 -0
data/lib/d-mark/cli.rb +28 -0
data/lib/d-mark/parser.rb +460 -0
data/lib/{dmark → d-mark}/translator.rb +5 -3
data/lib/d-mark/version.rb +3 -0
data/samples/identifiers-and-patterns.dmark +418 -1
data/samples/trivial.dmark +1 -0
data/samples/trivial.rb +20 -0
data/spec/d-mark/parser_spec.rb +271 -0
data/spec/spec_helper.rb +2 -0
metadata +30 -18
data/README.md +0 -70
data/lib/dmark.rb +0 -9
data/lib/dmark/lexer.rb +0 -235
data/lib/dmark/nodes.rb +0 -76
data/lib/dmark/parser.rb +0 -28
data/lib/dmark/tokens.rb +0 -49
data/lib/dmark/version.rb +0 -3
data/samples/identifiers-and-patterns.html +0 -59
data/scripts/translate-to-html.rb +0 -46
data/tasks/doc.rake +0 -13
data/tasks/rubocop.rake +0 -6
data/tasks/test.rake +0 -6

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 2e6f0ea7fb496bb3aadc7ea266b4a0a7650eaa05
-  data.tar.gz: 80049f389cec0e03ecf36c7d91320f99d2e45c89
+  metadata.gz: bee205ce9623266e9787eec60d991ddc1f3b99a3
+  data.tar.gz: 490bbf23b2a0a0b8cb1767f6b7a542f38a749ba1
 SHA512:
-  metadata.gz: 65ccc586b328445e4b76e4b6c0d020055b00c209f73a6c392449b47cc4ec14aef7c3d3a4ac2e72769ca34ba270fa0855148363eb1a5d046ddb248ee865e5e079
-  data.tar.gz: 1854baf6627c1c856255d855cc0f1e4b3dcac4b273843b64c205fc0c3902e5f651b31b152598a26618225a6b21ed89e80811769c77d372f8874203a2ab80314a
+  metadata.gz: 13cabb5e530a7067fe5e665c883d42f1bf59208249e2fd2ef9fe49cf87cfa0dffa1974be4307769dbb862d51ba466403154e6bbbbf655a244a275d760b26bacd
+  data.tar.gz: e65a20361865f63a70cf4de93fa27634dcf831ac4d966c64ccb146dabd03796e277b23fdb43dd7ebeef9fdde947f2c1d800e2afeb5710ebf79ec4d9231608bc9

data/Gemfile CHANGED

@@ -2,10 +2,7 @@ source 'https://rubygems.org'
 gemspec
-group :devel do
-  gem 'guard-rake'
-  gem 'rake'
-  gem 'rspec'
-  gem 'rubocop'
-  gem 'yard'
-end
+gem 'guard'
+gem 'guard-rake'
+gem 'rspec'
+gem 'rubocop'

data/Gemfile.lock CHANGED

@@ -1,7 +1,8 @@
 PATH
   remote: .
   specs:
-    dmark (0.1)
+    d-mark (0.1.0)
+      treetop (~> 1.4)
 GEM
   remote: https://rubygems.org/
@@ -23,17 +24,18 @@ GEM
     guard-rake (1.0.0)
       guard
       rake
-    listen (3.0.5)
+    listen (3.0.6)
       rb-fsevent (>= 0.9.3)
-      rb-inotify (>= 0.9)
+      rb-inotify (>= 0.9.7)
     lumberjack (1.0.10)
     method_source (0.8.2)
-    nenv (0.2.0)
+    nenv (0.3.0)
     notiffany (0.0.8)
       nenv (~> 0.1)
       shellany (~> 0.0)
-    parser (2.3.0.2)
+    parser (2.3.0.4)
       ast (~> 2.2)
+    polyglot (0.3.5)
     powerpack (0.1.1)
     pry (0.10.3)
       coderay (~> 1.1.0)
@@ -42,7 +44,7 @@ GEM
     rainbow (2.1.0)
     rake (10.5.0)
     rb-fsevent (0.9.7)
-    rb-inotify (0.9.5)
+    rb-inotify (0.9.7)
       ffi (>= 0.5.0)
     rspec (3.4.0)
       rspec-core (~> 3.4.0)
@@ -57,28 +59,30 @@ GEM
       diff-lcs (>= 1.2.0, < 2.0)
       rspec-support (~> 3.4.0)
     rspec-support (3.4.1)
-    rubocop (0.36.0)
-      parser (>= 2.3.0.0, < 3.0)
+    rubocop (0.37.2)
+      parser (>= 2.3.0.4, < 3.0)
       powerpack (~> 0.1)
       rainbow (>= 1.99.1, < 3.0)
       ruby-progressbar (~> 1.7)
+      unicode-display_width (~> 0.3)
     ruby-progressbar (1.7.5)
     shellany (0.0.1)
     slop (3.6.0)
     thor (0.19.1)
-    yard (0.8.7.6)
+    treetop (1.6.3)
+      polyglot (~> 0.3)
+    unicode-display_width (0.3.1)
 PLATFORMS
   ruby
 DEPENDENCIES
   bundler (>= 1.11.2, < 2.0)
-  dmark!
+  d-mark!
+  guard
   guard-rake
-  rake
   rspec
   rubocop
-  yard
 BUNDLED WITH
    1.11.2

data/Guardfile ADDED

@@ -0,0 +1,3 @@
+guard 'rake', task: 'default' do
+  watch(%r{^(lib|test|spec)/})
+end

data/NEWS.md CHANGED

@@ -1,7 +1,15 @@
 # D★Mark news
-## 0.1 (???)
+## 0.2 (2016-02-11)
-Features:
+Fixes:
-* Initial release
+* Many.
+Enhancements:
+* Many.
+## 0.1 (2016-01-31)
+Initial release.

data/README.adoc ADDED

@@ -0,0 +1,218 @@
+= D★Mark
+Denis Defreyne <denis@stoneship.org>
+CAUTION: D★Mark is experimental — use at your own risk!
+_D★Mark_ is a language for marking up prose. It facilitates writing semantically meaningful text, without limiting itself to the semantics provided by HTML or Markdown.
+Here’s an example of D★Mark:
+[source]
+----
+h2. Patterns
+para. Patterns are used to find items and layouts based on their identifier. They come in three varieties:
+list[unordered].
+  item. glob patterns
+  item. regular expression patterns
+  item. legacy patterns
+para. A glob pattern that matches every item is %pattern{/**/*}. A glob pattern that matches every item/layout with the extension %filename{md} is %glob{/**/*.md}.
+----
+== Samples
+The `samples/` directory contains some sample D★Mark files. They can be processed by invoking the appropriate script with the same filename. For example:
+....
+% bundle exec ruby samples/trivial.rb
+<p>I’m a <em>trivial</em> example!</p>
+....
+== Structure of a D★Mark document
+_D★Mark_ knows two constructs:
+Block-level elements::
+  Every non-blank line of a D★Mark document corresponds to a block. A block can be a paragraph, a list, a header, a source code listing, or more. They start with the name of the element, a period, a space character, followed by the content. For example:
++
+[source]
+----
+para. Patterns are used to find items and layouts based on their identifier. They come in three varieties.
+----
+Inline elements::
+  Inside a block, text can be marked up using inline elements, which start with a percentage sign, the name of the element, and the content within braces. For example, `%emph{crazy}` is an `emph` element with the content `crazy`.
+Block-level elements can be nested. To do so, indent the nested block two spaces deeper than the enclosing block. For example, the following defines a `list` element with three `item` elements inside it:
+[source]
+----
+list[unordered].
+  item. glob patterns
+  item. regular expression patterns
+  item. legacy patterns
+----
+Block-level elements can also include plain text. In this case, the content is not wrapped inside a nested block-level element. This is particularly useful for source code listing. For example:
+[source]
+----
+listing[lang=ruby].
+  identifier = Nanoc::Identifier.new('/about.md')
+  identifier.without_ext
+  # => "/about"
+  identifier.ext
+  # => "md"
+----
+Block-level elements and inline elements are identical in the tree representation of D★Mark. This means that any inline element can be rewritten as a block-level element.
+NOTE: To do: Elaborate on the distinction and similarity of block-level and inline elements.
+NOTE: To do: Describe escaping rules.
+=== Attributes
+Both block and inline elements can also have attributes. Attributes are enclosed in square brackets after the element name, as a comma-separated list of key-value pairs separated by an equal sign. The value part, along with the equal sign, can be omitted, in which case the value will be equal to the key name.
+For example:
+* `%code[lang=ruby]{Nanoc::VERSION}` is an inline `code` element with the `lang` attribute set to `ruby`.
+* `%only[web]{Refer to the release notes for details.}` is an inline `only` element with the `web` attribute set to `web`.
+* `h2[id=donkey]. All about donkeys` is a block-level `h2` element with the `id` attribute set to `donkey`.
+* `p[print]. This is a paragraph that only readers of the book will see.` is a block-level `para` element with the `print` attribute set to `print`.
+NOTE: The behavior of keys with missing values might change to default to booleans rather than to the key name.
+== Goals
+Be extensible::
+  D★Mark defines only the syntax of the markup language, and doesn’t bother with semantics. It does not prescribe which element names are valid in the context of a vocabulary, because it does not come with a vocabulary.
+Be simple::
+  Simplicity implies being easy to write and easy to parse. D★Mark eschews ambiguity and aims to have a short formal syntactical definition. This also means that it is easy to syntax highlight.
+Be compact::
+  Introduce as little extra syntax as possible.
+== Comparison with other languages
+D★Mark takes inspiration from a variety of other languages.
+HTML::
+  HTML is syntactically unambiguous, but comparatively more verbose than other languages. It also prescribes only a small set of elements, which makes it awkward to use for prose that requires more thorough markup. It is possible use `span` or `div` elements with custom classes, but this approach turns an already verbose language into something even more verbose.
++
+[source,html]
+----
+<p>A glob pattern that matches every item is <span class="pattern attr-kind-glob">/**/*</span>.</p>
+----
++
+[source,d-mark]
+----
+para. A glob pattern that matches every item is %pattern[glob]{/**/*}.
+----
+XML::
+  Similar to HTML, with the major difference that XML does not prescribe a set of elements.
++
+[source,xml]
+----
+<para>A glob pattern that matches every item is <pattern kind="glob">/**/*</pattern>.</para>
+----
++
+[source,d-mark]
+----
+para. A glob pattern that matches every item is %pattern[glob]{/**/*}.
+----
+Markdown::
+  Markdown has a compact syntax, but is complex and ambiguous, as evidenced by the many different mutually incompatible implementations. It prescribes a small set of elements (smaller even than HTML). It supports embedding raw HTML, which in theory makes it possible to combine the best of both worlds, but in practice leads to markup that is harder to read than either Markdown or HTML separately, and occasionally trips up the parser and syntax highlighter.
++
+[source]
+----
+A glob pattern that matches every item is <span class="glob attr-kind-glob">/**/*</span>.
+----
++
+[source,d-mark]
+----
+para. A glob pattern that matches every item is %pattern[glob]{/**/*}.
+----
+AsciiDoc::
+  AsciiDoc, along with its AsciiDoctor variant, are syntactically unambiguous, but complex languages. They prescribe a comparatively large set of elements which translates well to DocBook and HTML. They do not support custom markup or embedding raw HTML, which makes them harder t use for prose that requires more complex markup.
++
+_(No example, as this example cannot be represented with AsciiDoc.)_
+TeX, LaTeX::
+  TeX is a turing-complete programming language, as opposed to a markup language, intended for typesetting. This makes it impractical for using it as the source for converting it to other formats. Its syntax is simple and compact, and served as an inspiration for D★Mark.
++
+[source,latex]
+----
+A glob pattern that matches every item is \pattern[glob]{/**/*}.
+----
++
+[source,d-mark]
+----
+para. A glob pattern that matches every item is %pattern[glob]{/**/*}.
+----
+JSON, YAML::
+  JSON and YAML are data interchange formats rather than markup languages, and thus are not well-suited for marking up prose.
++
+[source,json]
+----
+[
+  "A glob pattern that matches every item is ",
+  ["pattern", {"kind": "glob"}, ["/**/*"]],
+  "."
+]
+----
++
+[source,d-mark]
+----
+para. A glob pattern that matches every item is %pattern[glob]{/**/*}.
+----
+== Specification
+NOTE: To do: write this section.
+== Programmatic usage
+Handling a D★Mark file consists of two stages: parsing and translating.
+The parsing stage converts text into a list of nodes. Construct a parser with the tokens as input, and call `#run` to get the list of nodes.
+[source,ruby]
+----
+content = File.read(ARGV[0])
+nodes = DMark::Parser.new(content).run
+----
+The translating stage is not the responsibility of D★Mark. A translator is part of the domain of the source text, and D★Mark only deals with syntax rather than semantics. A translator will run over the tree and convert it into something else (usually another string). To do so, handle each node type (`DMark::ElementNode` or `String`). For example, the following translator will convert the tree into something that resembles XML:
+[source,ruby]
+----
+class MyXMLLikeTranslator < DMark::Translator
+  def handle(node)
+    case node
+    when String
+      out << node
+    when DMark::Parser::ElementNode
+      out << "<#{node.name}>"
+      handle_children(node)
+      out << "</#{node.name}>"
+    end
+  end
+end
+result = MyXMLLikeTranslator.new(nodes).run
+puts result
+----

data/Rakefile CHANGED

@@ -1,3 +1,14 @@
-Rake.add_rakelib 'tasks'
+require 'rspec/core/rake_task'
+require 'rubocop/rake_task'
-task default: [:test, :rubocop]
+RSpec::Core::RakeTask.new(:spec) do |t|
+  t.rspec_opts = '-r ./spec/spec_helper.rb --color'
+  t.verbose = false
+end
+RuboCop::RakeTask.new(:rubocop) do |task|
+  task.options  = %w( --display-cop-names --format simple )
+  task.patterns = ['lib/**/*.rb', 'spec/**/*.rb']
+end
+task default: [:spec, :rubocop]

data/d-mark.gemspec CHANGED

@@ -1,4 +1,4 @@
-require_relative 'lib/dmark/version'
+require_relative 'lib/d-mark/version'
 Gem::Specification.new do |s|
   s.name        = 'd-mark'
@@ -13,14 +13,15 @@ Gem::Specification.new do |s|
   s.files =
     Dir['[A-Z]*'] +
-    Dir['{bin,lib,tasks,spec,samples,scripts}/**/*'] +
+    Dir['{lib,spec,samples}/**/*'] +
     ['d-mark.gemspec']
   s.require_paths = ['lib']
-  s.rdoc_options     = ['--main', 'README.md']
-  s.extra_rdoc_files = ['LICENSE', 'README.md', 'NEWS.md']
+  s.rdoc_options     = ['--main', 'README.adoc']
+  s.extra_rdoc_files = ['LICENSE', 'README.adoc', 'NEWS.md']
   s.required_ruby_version = '>= 2.1.0'
+  s.add_runtime_dependency('treetop', '~> 1.4')
   s.add_development_dependency('bundler', '>= 1.11.2', '< 2.0')
 end

data/lib/d-mark.rb ADDED

	@@ -0,0 +1,2 @@
1	+ require_relative 'd-mark/parser'
2	+ require_relative 'd-mark/translator'

data/lib/d-mark/cli.rb ADDED

@@ -0,0 +1,28 @@
+require_relative '../d-mark'
+data = File.read(ARGV[0]).strip
+parser = DMark::Parser.new(data)
+begin
+  before = Time.now
+  result = parser.parse
+  after = Time.now
+  result.each do |tree|
+    puts tree.inspect
+    puts
+  end
+  puts "parse duration: #{(after - before).to_f}s"
+rescue => e
+  case e
+  when DMark::Parser::ParserError
+    line = data.lines[e.line_nr]
+    puts "\e[31mError:\e[0m #{e.message}}"
+    puts
+    puts line
+    puts "\e[31m" + ' ' * e.col_nr + '↑' + "\e[0m"
+    exit 1
+  else
+    raise e
+  end
+end

data/lib/d-mark/parser.rb ADDED

@@ -0,0 +1,460 @@
+module DMark
+  class Parser
+    class ParserError < StandardError
+      attr_reader :line_nr
+      attr_reader :col_nr
+      def initialize(line_nr, col_nr, msg)
+        @line_nr = line_nr
+        @col_nr = col_nr
+        @msg = msg
+        super("parse error at line #{@line_nr + 1}, col #{@col_nr + 1}: #{@msg}")
+      end
+    end
+    class ElementNode
+      attr_reader :name
+      attr_reader :attributes
+      attr_reader :children
+      def initialize(name, attributes, children)
+        @name = name
+        @attributes = attributes
+        @children = children
+      end
+      def inspect
+        io = ''
+        io << 'Element(' << @name << ', '
+        if @attributes.any?
+          io << @attributes.inspect
+          io << ', '
+        end
+        io << @children.inspect
+        io << ')'
+        io
+      end
+      def ==(other)
+        case other
+        when ElementNode
+          @name == other.name &&
+            @children == other.children &&
+            @attributes == other.attributes
+        else
+          false
+        end
+      end
+    end
+    attr_reader :pos
+    def initialize(input)
+      @input = input
+      @input_chars = @input.chars
+      @pos = 0
+      @col_nr = 0
+      @line_nr = 0
+    end
+    def parse
+      res = []
+      loop do
+        break if eof?
+        res << read_block_with_children
+      end
+      res
+    end
+    ##########
+    def peek_char(pos = @pos)
+      if eof?
+        nil
+      else
+        @input_chars[pos]
+      end
+    end
+    def eof?(pos = @pos)
+      pos >= @input_chars.size
+    end
+    def advance
+      if !eof? && @input_chars[@pos] == "\n"
+        @line_nr += 1
+        @col_nr = 0
+      end
+      @pos += 1
+      @col_nr += 1
+    end
+    def read_char(c)
+      char = peek_char
+      if char != c
+        raise_parse_error("expected #{c.inspect}, but got #{char.nil? ? 'EOF' : char.inspect}")
+      else
+        advance
+        char
+      end
+    end
+    ##########
+    def read_block_with_children(indentation = 0)
+      res = read_single_block
+      pending_blanks = 0
+      until eof?
+        blank_pos = try_read_blank_line
+        if blank_pos
+          @pos = blank_pos
+          @line_nr += 1
+          @col_nr = 0
+          pending_blanks += 1
+        else
+          sub_indentation = detect_indentation
+          break if sub_indentation < indentation + 1
+          read_indentation(indentation + 1)
+          if try_read_block_start
+            res.children << read_block_with_children(indentation + 1)
+          else
+            res.children << "\n" unless res.children.empty?
+            pending_blanks.times { res.children << "\n" }
+            pending_blanks = 0
+            res.children.concat(read_inline_content)
+            read_end_of_inline_content
+          end
+        end
+      end
+      res
+    end
+    def try_read_blank_line
+      pos = @pos
+      loop do
+        case peek_char(pos)
+        when ' '
+          pos += 1
+        when nil
+          break pos + 1
+        when "\n"
+          break pos + 1
+        else
+          break nil
+        end
+      end
+    end
+    # FIXME: ugly and duplicated
+    def try_read_block_start
+      old_pos = @pos
+      success =
+        if try_read_identifier_head
+          if try_read_identifier_tail
+            case peek_char
+            when '['
+              true
+            when '.'
+              advance
+              [' ', "\n", nil].include?(peek_char)
+            end
+          end
+        end
+      @pos = old_pos
+      success
+    end
+    # FIXME: ugly and duplicated
+    def try_read_identifier_head
+      char = peek_char
+      case char
+      when 'a'..'z'
+        advance
+        char
+      end
+    end
+    # FIXME: ugly and duplicated
+    def try_read_identifier_tail
+      res = ''
+      loop do
+        char = peek_char
+        case char
+        when 'a'..'z', '-', '0'..'9'
+          advance
+          res << char
+        else
+          break
+        end
+      end
+      res.to_s
+    end
+    def detect_indentation
+      indentation_chars = 0
+      pos = @pos
+      loop do
+        case peek_char(pos)
+        when ' '
+          pos += 1
+          indentation_chars += 1
+        else
+          break
+        end
+      end
+      indentation_chars / 2
+    end
+    def read_until_eol_or_eof
+      res = ''
+      loop do
+        char = peek_char
+        case char
+        when "\n"
+          advance
+          break
+        when nil
+          break
+        else
+          advance
+          res << char
+        end
+      end
+      res.to_s
+    end
+    def read_indentation(indentation)
+      indentation.times do
+        read_char(' ')
+        read_char(' ')
+      end
+    end
+    def read_single_block
+      identifier = read_identifier
+      attributes =
+        if peek_char == '['
+          read_attributes
+        else
+          {}
+        end
+      read_char('.')
+      case peek_char
+      when nil, "\n"
+        advance
+        ElementNode.new(identifier, attributes, [])
+      else
+        read_char(' ')
+        content = read_inline_content
+        read_end_of_inline_content
+        ElementNode.new(identifier, attributes, content)
+      end
+    end
+    def read_end_of_inline_content
+      char = peek_char
+      case char
+      when "\n", nil
+        advance
+      when '}'
+        raise_parse_error('unexpected } -- try escaping it as "%}"')
+      else
+        raise_parse_error('unexpected content')
+      end
+    end
+    def read_identifier
+      a = read_identifier_head
+      b = read_identifier_tail
+      "#{a}#{b}"
+    end
+    def read_identifier_head
+      char = peek_char
+      case char
+      when 'a'..'z'
+        advance
+        char
+      else
+        raise_parse_error("expected an identifier, but got #{char.inspect}")
+      end
+    end
+    def read_identifier_tail
+      res = ''
+      loop do
+        char = peek_char
+        case char
+        when 'a'..'z', '-', '0'..'9'
+          advance
+          res << char
+        else
+          break
+        end
+      end
+      res.to_s
+    end
+    def read_attributes
+      read_char('[')
+      res = {}
+      at_start = true
+      loop do
+        char = peek_char
+        case char
+        when ']'
+          advance
+          break
+        else
+          read_char(',') unless at_start
+          key = read_attribute_key
+          if peek_char == '='
+            read_char('=')
+            value = read_attribute_value
+          else
+            value = key
+          end
+          res[key] = value
+          at_start = false
+        end
+      end
+      res
+    end
+    def read_attribute_key
+      read_identifier
+    end
+    def read_attribute_value
+      res = ''
+      is_escaping = false
+      loop do
+        char = peek_char
+        if is_escaping
+          case char
+          when nil, "\n"
+            break
+          else
+            advance
+            res << char
+            is_escaping = false
+          end
+        else
+          case char
+          when nil, "\n", ']', ','
+            break
+          when '%'
+            advance
+            is_escaping = true
+          else
+            advance
+            res << char
+          end
+        end
+      end
+      res.to_s
+    end
+    def read_inline_content
+      res = []
+      loop do
+        char = peek_char
+        case char
+        when "\n", nil
+          break
+        when '}'
+          break
+        when '%'
+          advance
+          res << read_percent_body
+        else
+          res << read_string
+        end
+      end
+      res
+    end
+    def read_string
+      res = ''
+      loop do
+        char = peek_char
+        case char
+        when nil, "\n", '%', '}'
+          break
+        else
+          advance
+          res << char
+        end
+      end
+      res.to_s
+    end
+    def read_percent_body
+      char = peek_char
+      case char
+      when '%', '}'
+        advance
+        char.to_s
+      when nil, "\n"
+        raise_parse_error("expected something after %")
+      else
+        read_inline_element
+      end
+    end
+    def read_inline_element
+      name = read_identifier
+      attributes =
+        if peek_char == '['
+          read_attributes
+        else
+          {}
+        end
+      read_char('{')
+      contents = read_inline_content
+      read_char('}')
+      ElementNode.new(name, attributes, contents)
+    end
+    def raise_parse_error(msg)
+      raise ParserError.new(@line_nr, @col_nr, msg)
+    end
+  end
+end