RubyGems - rosetta-ruby - Versions diffs - 0.0.0 - Mend

rosetta-ruby 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

checksums.yaml +7 -0
data/.DS_Store +0 -0
data/.gitignore +58 -0
data/.rubocop.yml +8 -0
data/.vscode/settings.json +3 -0
data/Gemfile +7 -0
data/Gemfile.lock +16 -0
data/README.md +48 -0
data/TODOS.md +11 -0
data/lib/rosetta/abstract_syntax_tree.rb +28 -0
data/lib/rosetta/formatters/html/html_generator.rb +96 -0
data/lib/rosetta/formatters/html/html_writer.rb +19 -0
data/lib/rosetta/service_base.rb +8 -0
data/lib/rosetta/services/inline_token_resolver.rb +80 -0
data/lib/rosetta/services/input_scanner.rb +15 -0
data/lib/rosetta/services/token_resolver.rb +42 -0
data/lib/rosetta/services/tree_parser.rb +126 -0
data/lib/rosetta/tokens/basic_list.rb +10 -0
data/lib/rosetta/tokens/basic_list_item.rb +19 -0
data/lib/rosetta/tokens/basic_token.rb +51 -0
data/lib/rosetta/tokens/block_quote.rb +10 -0
data/lib/rosetta/tokens/block_token.rb +36 -0
data/lib/rosetta/tokens/bold.rb +21 -0
data/lib/rosetta/tokens/break.rb +20 -0
data/lib/rosetta/tokens/code_block.rb +10 -0
data/lib/rosetta/tokens/code_block_delimiter.rb +18 -0
data/lib/rosetta/tokens/header.rb +42 -0
data/lib/rosetta/tokens/inline_code.rb +25 -0
data/lib/rosetta/tokens/italics.rb +21 -0
data/lib/rosetta/tokens/line_break.rb +18 -0
data/lib/rosetta/tokens/link.rb +66 -0
data/lib/rosetta/tokens/new_line.rb +20 -0
data/lib/rosetta/tokens/numbered_list.rb +10 -0
data/lib/rosetta/tokens/numbered_list_item.rb +25 -0
data/lib/rosetta/tokens/paragraph.rb +10 -0
data/lib/rosetta/tokens/quote.rb +24 -0
data/lib/rosetta/tokens/shared/inline_tokens.rb +30 -0
data/lib/rosetta/tokens/strikethrough.rb +29 -0
data/lib/rosetta/tokens/text.rb +14 -0
data/lib/rosetta-ruby.rb +15 -0
data/main.rb +21 -0
data/rosetta-ruby.gemspec +16 -0
data/samples/all.md +50 -0
data/samples/bold.md +1 -0
data/samples/code_blocks.md +5 -0
data/samples/headers.md +11 -0
data/samples/link.md +3 -0
data/samples/lists.md +10 -0
data/samples/quotes.md +6 -0
data/samples/text.md +5 -0
metadata +91 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 48ee42bc64f9c983f926292355d3b43924f2ed9f46650218ce0f18b3cca357b5
+  data.tar.gz: 95d425b76360875deed5853a549b2f25cee0c3b96ce9b9a97187243564885c62
+SHA512:
+  metadata.gz: 45e4186535f9b6a4093d6c90d751e188838ba75d3744bfa6b45d52bb20e66f15d1c2e98758f9c08cdc17e48cec06ede4bbe9d644ae815fa9a75ca377f07b2cec
+  data.tar.gz: 7d8b107a44460bfcf0dc67ddc24be074625ffa20a42e5e5755c06e583c58755b9fdb6125f50504660a66592846af8f91ad752966727c99d8eb4cdd596f4748a9

data/.DS_Store ADDED Viewed

Binary file

data/.gitignore ADDED Viewed

@@ -0,0 +1,58 @@
+*.gem
+*.rbc
+/.config
+/coverage/
+/InstalledFiles
+/pkg/
+/spec/reports/
+/spec/examples.txt
+/test/tmp/
+/test/version_tmp/
+/tmp/
+# Used by dotenv library to load environment variables.
+# .env
+# Ignore Byebug command history file.
+.byebug_history
+/.vscode/
+## Specific to RubyMotion:
+.dat*
+.repl_history
+build/
+*.bridgesupport
+build-iPhoneOS/
+build-iPhoneSimulator/
+## Specific to RubyMotion (use of CocoaPods):
+#
+# We recommend against adding the Pods directory to your .gitignore. However
+# you should judge for yourself, the pros and cons are mentioned at:
+# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
+#
+# vendor/Pods/
+## Documentation cache and generated files:
+/.yardoc/
+/_yardoc/
+/doc/
+/rdoc/
+## Environment normalization:
+/.bundle/
+/vendor/bundle
+/lib/bundler/man/
+# for a library or gem, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# Gemfile.lock
+# .ruby-version
+# .ruby-gemset
+# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
+.rvmrc
+# Used by RuboCop. Remote config files pulled in from inherit_from directive.
+# .rubocop-https?--*

data/.rubocop.yml ADDED Viewed

@@ -0,0 +1,8 @@
+Lint/MissingSuper:
+  Enabled: false
+Style/SymbolArray:
+  Enabled: false
+Layout/LineLength:
+  Max: 100

data/.vscode/settings.json ADDED Viewed

@@ -0,0 +1,3 @@
+{
+  "ruby.rubocop.configFilePath": ".rubocop.yml"
+}

data/Gemfile ADDED Viewed

@@ -0,0 +1,7 @@
+# frozen_string_literal: true
+source 'https://rubygems.org'
+ruby '3.0.0'
+gem 'byebug'
+gem 'rspec'

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,16 @@
+GEM
+  remote: https://rubygems.org/
+  specs:
+    byebug (11.1.3)
+PLATFORMS
+  x86_64-darwin-20
+DEPENDENCIES
+  byebug
+RUBY VERSION
+   ruby 3.0.0p0
+BUNDLED WITH
+   2.2.3

data/README.md ADDED Viewed

@@ -0,0 +1,48 @@
+# rosetta
+A Markdown to HTML translator.
+**[April 15th 2021] This project is a work in progress.**
+I've recently taken an interest in the syntax and semantics of programming languages, and would
+like to explore some of the practices involved in writing one. Having said that, I don't want to
+dive into writing a full featured programming language for various reasons, the primary one being
+that I don't have any use cases where an existing language could not trivially satisfy my programming
+needs.
+With that in mind I'm going to start smaller with a translator that takes a Markdown source file
+and outputs HTML. In an ideal world I'd use it as part of another project, but regardless of whether
+I get that far I'm going to enjoy digging into the challenges involved.
+My initial plan is to try and parse Markdown source into an abstract syntax tree of some sort, and then convert
+that AST into HTML. I'll start with a _subset_ of GitHub flavoured Markdown; I use GitHub's Markdown
+on a near daily basis, and if I want to use this elsewhere I probably won't have need for a full
+Markdown feature set.
+## Markdown syntax
+Rosetta will support the following features. For examples of what these look like in use,
+visit [the GitHub Markdown Guide](https://guides.github.com/features/mastering-markdown/).
+I'll mark each item as completed once Rosetta can convert them to its AST representation.
+### Block level
+- [x] Headers from 1-6
+- [x] Numbered and bulleted lists, as well as sub lists
+- [x] Quotes
+- [x] Standalone code blocks
+- [x] Block URL links
+- [ ] Block image embedding
+### Inline
+- [x] Bold, italic, strikethrough
+- [x] Inline URL links
+- [x] Inline code blocks
+- [ ] Inline image embedding
+### Possible future syntax
+- [ ] Superscript
+- [ ] Subscript
+- [ ] Code block language types

data/TODOS.md ADDED Viewed

@@ -0,0 +1,11 @@
+- [ ] Refactor to accept string input instead of a file
+- [x] Refactor Scanner into action
+- [x] Move HTML generation into formatter folder
+- [ ] General refactor
+- [ ] Gemify
+- [ ] Support inline nesting
+- [ ] Support block nesting
+- [ ] Revisit token matching
+- [ ] Implement visitor pattern for matching tokens?
+- [ ] Implement visitor pattern for consuming tokens?
+- [ ] Add support for escaping delimiters (eg \*)

data/lib/rosetta/abstract_syntax_tree.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+require './lib/rosetta/services/tree_parser'
+# Holds all logic for creating the AST, including parsing.
+class AbstractSyntaxTree
+  attr_reader :base_tokens
+  def initialize(base_tokens)
+    @base_tokens = base_tokens
+  end
+  def token_tree
+    @token_tree ||= TreeParser.call(@base_tokens)
+  end
+  def formatted_token_tree
+    token_tree.map(&:to_s)
+  end
+  def node_representation
+    token_tree.map(&:node_representation)
+  end
+  def raw_tokens
+    @base_tokens.map(&:to_s)
+  end
+end

data/lib/rosetta/formatters/html/html_generator.rb ADDED Viewed

@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+# Generates HTML for a given AST token.
+class HTMLGenerator
+  def initialize
+    define_header_generators
+  end
+  def generate(token)
+    token.accept(self)
+  end
+  def generate_basic_list(token)
+    generate_list(token.children, 'ul')
+  end
+  def generate_numbered_list(token)
+    generate_list(token.children, 'ol')
+  end
+  def generate_list_item(token)
+    "<li>#{token.value}</li>\n"
+  end
+  def generate_block_quote(token)
+    child_tokens = token.children.map { |child_token| generate(child_token) }
+    ["<pre><blockquote>\n", child_tokens, "</blockquote></pre>\n"].join
+  end
+  def generate_code_block(token)
+    child_tokens = token.children.map { |child_token| generate(child_token) }
+    ["<pre><code>\n", child_tokens, "</code></pre>\n"].compact.join
+  end
+  def generate_new_line(_token)
+    "\n"
+  end
+  def generate_quote(token)
+    "#{token.value}\n"
+  end
+  def generate_text(token)
+    token.value
+  end
+  def generate_break(_token)
+    "</br>\n"
+  end
+  def generate_paragraph(token)
+    child_tokens = token.children.map { |child_token| generate(child_token) }
+    ["<p>\n", child_tokens, "</p>\n"].join
+  end
+  def generate_strikethrough(token)
+    "<del>#{token.value}</del>"
+  end
+  def generate_italics(token)
+    "<em>#{token.value}</em>"
+  end
+  def generate_inline_code(token)
+    "<code>#{token.value}</code>"
+  end
+  def generate_bold(token)
+    "<b>#{token.value}</b>"
+  end
+  def generate_link(token)
+    "<a href='#{token.url}'>#{token.value}</a>"
+  end
+  private
+  def define_header_generators
+    (1..6).each do |header_level|
+      method_name = "generate_header_#{header_level}"
+      self.class.define_method(method_name) do |token|
+        "<h#{header_level}>#{token.value}</h#{header_level}>\n"
+      end
+    end
+  end
+  def generate_list(child_tokens, tag)
+    child_token_strings = child_tokens.map { |child_token| generate_list_item(child_token) }
+    ["<#{tag}>\n", child_token_strings, "</#{tag}>\n"].join
+  end
+end

data/lib/rosetta/formatters/html/html_writer.rb ADDED Viewed

@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+require './lib/rosetta/service_base'
+require './lib/rosetta/formatters/html/html_generator'
+# Turns a tree of tokens into it's corresponding HTML output.
+class HTMLWriter < ServiceBase
+  def initialize(token_tree)
+    @token_tree = token_tree
+  end
+  def call
+    @token_tree.map { |token| html_generator.generate(token) }.compact.join
+  end
+  def html_generator
+    @html_generator ||= HTMLGenerator.new
+  end
+end

data/lib/rosetta/service_base.rb ADDED Viewed

@@ -0,0 +1,8 @@
+# frozen_string_literal: true
+# Base Service class.
+class ServiceBase
+  def self.call(*args, &block)
+    new(*args, &block).call
+  end
+end

data/lib/rosetta/services/inline_token_resolver.rb ADDED Viewed

@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+require './lib/rosetta/service_base'
+require './lib/rosetta/tokens/text'
+require './lib/rosetta/tokens/bold'
+require './lib/rosetta/tokens/italics'
+require './lib/rosetta/tokens/strikethrough'
+require './lib/rosetta/tokens/inline_code'
+# Matches and extracts tokens contained in text.
+class InlineTokenResolver < ServiceBase
+  def initialize(line)
+    @line = line
+    @tokens = []
+    @current_counter = 0
+    @base_counter = 0
+  end
+  def call
+    resolve_text_into_tokens until end_of_line?
+    consume_plain_text_up_to(@current_counter)
+    @tokens << NewLine.new
+    @tokens
+  end
+  private
+  def end_of_line?
+    @current_counter >= @line.length - 1
+  end
+  def resolve_text_into_tokens
+    matching_token_type = match_token_type
+    if matching_token_type.nil?
+      @current_counter += 1
+    else
+      handle_matching_token(matching_token_type)
+    end
+  end
+  def match_token_type
+    current_line = @line[@current_counter..]
+    inline_token_classes.find do |token_type|
+      token_type.matches?(current_line)
+    end
+  end
+  def handle_matching_token(token_type)
+    # We don't want to consume the token type delimiter.
+    consume_plain_text_up_to(@current_counter - 1) unless @current_counter.zero?
+    @tokens << consume(token_type)
+  end
+  def consume_plain_text_up_to(final_index)
+    plain_text = @line[@base_counter..final_index]
+    @base_counter = @current_counter
+    @tokens << Text.new(plain_text) unless plain_text.length.zero?
+  end
+  def inline_token_classes
+    @inline_token_classes ||= BasicToken::INLINE_CLASS_NAMES.map { |type| constantize_type(type) }
+  end
+  def consume(token_type)
+    token = token_type.consume(@line[@base_counter..])
+    length_consumed = token.source_text.length
+    @current_counter += length_consumed
+    @base_counter = @current_counter
+    token
+  end
+  def constantize_type(type)
+    Object.const_get(type)
+  end
+end

data/lib/rosetta/services/input_scanner.rb ADDED Viewed

@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+require './lib/rosetta/service_base'
+require './lib/rosetta/services/token_resolver'
+# Scans and tokenises source text.
+class InputScanner < ServiceBase
+  def initialize(input)
+    @input = input
+  end
+  def call
+    @input.split("\n").map { |line| TokenResolver.call(line) }.flatten
+  end
+end

data/lib/rosetta/services/token_resolver.rb ADDED Viewed

@@ -0,0 +1,42 @@
+# frozen_string_literal: true
+require './lib/rosetta/service_base'
+require './lib/rosetta/services/inline_token_resolver'
+require './lib/rosetta/tokens/basic_list_item'
+require './lib/rosetta/tokens/code_block_delimiter'
+require './lib/rosetta/tokens/header'
+require './lib/rosetta/tokens/line_break'
+require './lib/rosetta/tokens/link'
+require './lib/rosetta/tokens/numbered_list_item'
+require './lib/rosetta/tokens/quote'
+require './lib/rosetta/tokens/text'
+require './lib/rosetta/tokens/basic_token'
+# Resolves token types from source text.
+class TokenResolver < ServiceBase
+  def initialize(text)
+    @text = text
+  end
+  def call
+    top_level_token_classes.each do |type_class|
+      return type_class.new(@text) if type_class.matches?(@text)
+    end
+    resolve_text_block
+  end
+  private
+  def top_level_token_classes
+    BasicToken::TOP_LEVEL_CLASS_NAMES.map { |type| constantize_type(type) }
+  end
+  def resolve_text_block
+    InlineTokenResolver.call(@text)
+  end
+  def constantize_type(type)
+    Object.const_get(type)
+  end
+end