rosetta-ruby 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.DS_Store +0 -0
- data/.gitignore +58 -0
- data/.rubocop.yml +8 -0
- data/.vscode/settings.json +3 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +16 -0
- data/README.md +48 -0
- data/TODOS.md +11 -0
- data/lib/rosetta/abstract_syntax_tree.rb +28 -0
- data/lib/rosetta/formatters/html/html_generator.rb +96 -0
- data/lib/rosetta/formatters/html/html_writer.rb +19 -0
- data/lib/rosetta/service_base.rb +8 -0
- data/lib/rosetta/services/inline_token_resolver.rb +80 -0
- data/lib/rosetta/services/input_scanner.rb +15 -0
- data/lib/rosetta/services/token_resolver.rb +42 -0
- data/lib/rosetta/services/tree_parser.rb +126 -0
- data/lib/rosetta/tokens/basic_list.rb +10 -0
- data/lib/rosetta/tokens/basic_list_item.rb +19 -0
- data/lib/rosetta/tokens/basic_token.rb +51 -0
- data/lib/rosetta/tokens/block_quote.rb +10 -0
- data/lib/rosetta/tokens/block_token.rb +36 -0
- data/lib/rosetta/tokens/bold.rb +21 -0
- data/lib/rosetta/tokens/break.rb +20 -0
- data/lib/rosetta/tokens/code_block.rb +10 -0
- data/lib/rosetta/tokens/code_block_delimiter.rb +18 -0
- data/lib/rosetta/tokens/header.rb +42 -0
- data/lib/rosetta/tokens/inline_code.rb +25 -0
- data/lib/rosetta/tokens/italics.rb +21 -0
- data/lib/rosetta/tokens/line_break.rb +18 -0
- data/lib/rosetta/tokens/link.rb +66 -0
- data/lib/rosetta/tokens/new_line.rb +20 -0
- data/lib/rosetta/tokens/numbered_list.rb +10 -0
- data/lib/rosetta/tokens/numbered_list_item.rb +25 -0
- data/lib/rosetta/tokens/paragraph.rb +10 -0
- data/lib/rosetta/tokens/quote.rb +24 -0
- data/lib/rosetta/tokens/shared/inline_tokens.rb +30 -0
- data/lib/rosetta/tokens/strikethrough.rb +29 -0
- data/lib/rosetta/tokens/text.rb +14 -0
- data/lib/rosetta-ruby.rb +15 -0
- data/main.rb +21 -0
- data/rosetta-ruby.gemspec +16 -0
- data/samples/all.md +50 -0
- data/samples/bold.md +1 -0
- data/samples/code_blocks.md +5 -0
- data/samples/headers.md +11 -0
- data/samples/link.md +3 -0
- data/samples/lists.md +10 -0
- data/samples/quotes.md +6 -0
- data/samples/text.md +5 -0
- metadata +91 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 48ee42bc64f9c983f926292355d3b43924f2ed9f46650218ce0f18b3cca357b5
|
4
|
+
data.tar.gz: 95d425b76360875deed5853a549b2f25cee0c3b96ce9b9a97187243564885c62
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 45e4186535f9b6a4093d6c90d751e188838ba75d3744bfa6b45d52bb20e66f15d1c2e98758f9c08cdc17e48cec06ede4bbe9d644ae815fa9a75ca377f07b2cec
|
7
|
+
data.tar.gz: 7d8b107a44460bfcf0dc67ddc24be074625ffa20a42e5e5755c06e583c58755b9fdb6125f50504660a66592846af8f91ad752966727c99d8eb4cdd596f4748a9
|
data/.DS_Store
ADDED
Binary file
|
data/.gitignore
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
# Ignore Byebug command history file.
|
17
|
+
.byebug_history
|
18
|
+
|
19
|
+
/.vscode/
|
20
|
+
|
21
|
+
## Specific to RubyMotion:
|
22
|
+
.dat*
|
23
|
+
.repl_history
|
24
|
+
build/
|
25
|
+
*.bridgesupport
|
26
|
+
build-iPhoneOS/
|
27
|
+
build-iPhoneSimulator/
|
28
|
+
|
29
|
+
## Specific to RubyMotion (use of CocoaPods):
|
30
|
+
#
|
31
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
32
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
33
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
34
|
+
#
|
35
|
+
# vendor/Pods/
|
36
|
+
|
37
|
+
## Documentation cache and generated files:
|
38
|
+
/.yardoc/
|
39
|
+
/_yardoc/
|
40
|
+
/doc/
|
41
|
+
/rdoc/
|
42
|
+
|
43
|
+
## Environment normalization:
|
44
|
+
/.bundle/
|
45
|
+
/vendor/bundle
|
46
|
+
/lib/bundler/man/
|
47
|
+
|
48
|
+
# for a library or gem, you might want to ignore these files since the code is
|
49
|
+
# intended to run in multiple environments; otherwise, check them in:
|
50
|
+
# Gemfile.lock
|
51
|
+
# .ruby-version
|
52
|
+
# .ruby-gemset
|
53
|
+
|
54
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
55
|
+
.rvmrc
|
56
|
+
|
57
|
+
# Used by RuboCop. Remote config files pulled in from inherit_from directive.
|
58
|
+
# .rubocop-https?--*
|
data/.rubocop.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
data/README.md
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# rosetta
|
2
|
+
|
3
|
+
A Markdown to HTML translator.
|
4
|
+
|
5
|
+
**[April 15th 2021] This project is a work in progress.**
|
6
|
+
|
7
|
+
I've recently taken an interest in the syntax and semantics of programming languages, and would
|
8
|
+
like to explore some of the practices involved in writing one. Having said that, I don't want to
|
9
|
+
dive into writing a full featured programming language for various reasons, the primary one being
|
10
|
+
that I don't have any use cases where an existing language could not trivially satisfy my programming
|
11
|
+
needs.
|
12
|
+
|
13
|
+
With that in mind I'm going to start smaller with a translator that takes a Markdown source file
|
14
|
+
and outputs HTML. In an ideal world I'd use it as part of another project, but regardless of whether
|
15
|
+
I get that far I'm going to enjoy digging into the challenges involved.
|
16
|
+
|
17
|
+
My initial plan is to try and parse Markdown source into an abstract syntax tree of some sort, and then convert
|
18
|
+
that AST into HTML. I'll start with a _subset_ of GitHub flavoured Markdown; I use GitHub's Markdown
|
19
|
+
on a near daily basis, and if I want to use this elsewhere I probably won't have need for a full
|
20
|
+
Markdown feature set.
|
21
|
+
|
22
|
+
## Markdown syntax
|
23
|
+
|
24
|
+
Rosetta will support the following features. For examples of what these look like in use,
|
25
|
+
visit [the GitHub Markdown Guide](https://guides.github.com/features/mastering-markdown/).
|
26
|
+
I'll mark each item as completed once Rosetta can convert them to its AST representation.
|
27
|
+
|
28
|
+
### Block level
|
29
|
+
|
30
|
+
- [x] Headers from 1-6
|
31
|
+
- [x] Numbered and bulleted lists, as well as sub lists
|
32
|
+
- [x] Quotes
|
33
|
+
- [x] Standalone code blocks
|
34
|
+
- [x] Block URL links
|
35
|
+
- [ ] Block image embedding
|
36
|
+
|
37
|
+
### Inline
|
38
|
+
|
39
|
+
- [x] Bold, italic, strikethrough
|
40
|
+
- [x] Inline URL links
|
41
|
+
- [x] Inline code blocks
|
42
|
+
- [ ] Inline image embedding
|
43
|
+
|
44
|
+
### Possible future syntax
|
45
|
+
|
46
|
+
- [ ] Superscript
|
47
|
+
- [ ] Subscript
|
48
|
+
- [ ] Code block language types
|
data/TODOS.md
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
- [ ] Refactor to accept string input instead of a file
|
2
|
+
- [x] Refactor Scanner into action
|
3
|
+
- [x] Move HTML generation into formatter folder
|
4
|
+
- [ ] General refactor
|
5
|
+
- [ ] Gemify
|
6
|
+
- [ ] Support inline nesting
|
7
|
+
- [ ] Support block nesting
|
8
|
+
- [ ] Revisit token matching
|
9
|
+
- [ ] Implement visitor pattern for matching tokens?
|
10
|
+
- [ ] Implement visitor pattern for consuming tokens?
|
11
|
+
- [ ] Add support for escaping delimiters (eg \*)
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/services/tree_parser'
|
4
|
+
|
5
|
+
# Holds all logic for creating the AST, including parsing.
|
6
|
+
class AbstractSyntaxTree
|
7
|
+
attr_reader :base_tokens
|
8
|
+
|
9
|
+
def initialize(base_tokens)
|
10
|
+
@base_tokens = base_tokens
|
11
|
+
end
|
12
|
+
|
13
|
+
def token_tree
|
14
|
+
@token_tree ||= TreeParser.call(@base_tokens)
|
15
|
+
end
|
16
|
+
|
17
|
+
def formatted_token_tree
|
18
|
+
token_tree.map(&:to_s)
|
19
|
+
end
|
20
|
+
|
21
|
+
def node_representation
|
22
|
+
token_tree.map(&:node_representation)
|
23
|
+
end
|
24
|
+
|
25
|
+
def raw_tokens
|
26
|
+
@base_tokens.map(&:to_s)
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Generates HTML for a given AST token.
|
4
|
+
class HTMLGenerator
|
5
|
+
def initialize
|
6
|
+
define_header_generators
|
7
|
+
end
|
8
|
+
|
9
|
+
def generate(token)
|
10
|
+
token.accept(self)
|
11
|
+
end
|
12
|
+
|
13
|
+
def generate_basic_list(token)
|
14
|
+
generate_list(token.children, 'ul')
|
15
|
+
end
|
16
|
+
|
17
|
+
def generate_numbered_list(token)
|
18
|
+
generate_list(token.children, 'ol')
|
19
|
+
end
|
20
|
+
|
21
|
+
def generate_list_item(token)
|
22
|
+
"<li>#{token.value}</li>\n"
|
23
|
+
end
|
24
|
+
|
25
|
+
def generate_block_quote(token)
|
26
|
+
child_tokens = token.children.map { |child_token| generate(child_token) }
|
27
|
+
|
28
|
+
["<pre><blockquote>\n", child_tokens, "</blockquote></pre>\n"].join
|
29
|
+
end
|
30
|
+
|
31
|
+
def generate_code_block(token)
|
32
|
+
child_tokens = token.children.map { |child_token| generate(child_token) }
|
33
|
+
|
34
|
+
["<pre><code>\n", child_tokens, "</code></pre>\n"].compact.join
|
35
|
+
end
|
36
|
+
|
37
|
+
def generate_new_line(_token)
|
38
|
+
"\n"
|
39
|
+
end
|
40
|
+
|
41
|
+
def generate_quote(token)
|
42
|
+
"#{token.value}\n"
|
43
|
+
end
|
44
|
+
|
45
|
+
def generate_text(token)
|
46
|
+
token.value
|
47
|
+
end
|
48
|
+
|
49
|
+
def generate_break(_token)
|
50
|
+
"</br>\n"
|
51
|
+
end
|
52
|
+
|
53
|
+
def generate_paragraph(token)
|
54
|
+
child_tokens = token.children.map { |child_token| generate(child_token) }
|
55
|
+
|
56
|
+
["<p>\n", child_tokens, "</p>\n"].join
|
57
|
+
end
|
58
|
+
|
59
|
+
def generate_strikethrough(token)
|
60
|
+
"<del>#{token.value}</del>"
|
61
|
+
end
|
62
|
+
|
63
|
+
def generate_italics(token)
|
64
|
+
"<em>#{token.value}</em>"
|
65
|
+
end
|
66
|
+
|
67
|
+
def generate_inline_code(token)
|
68
|
+
"<code>#{token.value}</code>"
|
69
|
+
end
|
70
|
+
|
71
|
+
def generate_bold(token)
|
72
|
+
"<b>#{token.value}</b>"
|
73
|
+
end
|
74
|
+
|
75
|
+
def generate_link(token)
|
76
|
+
"<a href='#{token.url}'>#{token.value}</a>"
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def define_header_generators
|
82
|
+
(1..6).each do |header_level|
|
83
|
+
method_name = "generate_header_#{header_level}"
|
84
|
+
|
85
|
+
self.class.define_method(method_name) do |token|
|
86
|
+
"<h#{header_level}>#{token.value}</h#{header_level}>\n"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def generate_list(child_tokens, tag)
|
92
|
+
child_token_strings = child_tokens.map { |child_token| generate_list_item(child_token) }
|
93
|
+
|
94
|
+
["<#{tag}>\n", child_token_strings, "</#{tag}>\n"].join
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/service_base'
|
4
|
+
require './lib/rosetta/formatters/html/html_generator'
|
5
|
+
|
6
|
+
# Turns a tree of tokens into it's corresponding HTML output.
|
7
|
+
class HTMLWriter < ServiceBase
|
8
|
+
def initialize(token_tree)
|
9
|
+
@token_tree = token_tree
|
10
|
+
end
|
11
|
+
|
12
|
+
def call
|
13
|
+
@token_tree.map { |token| html_generator.generate(token) }.compact.join
|
14
|
+
end
|
15
|
+
|
16
|
+
def html_generator
|
17
|
+
@html_generator ||= HTMLGenerator.new
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/service_base'
|
4
|
+
require './lib/rosetta/tokens/text'
|
5
|
+
require './lib/rosetta/tokens/bold'
|
6
|
+
require './lib/rosetta/tokens/italics'
|
7
|
+
require './lib/rosetta/tokens/strikethrough'
|
8
|
+
require './lib/rosetta/tokens/inline_code'
|
9
|
+
|
10
|
+
# Matches and extracts tokens contained in text.
|
11
|
+
class InlineTokenResolver < ServiceBase
|
12
|
+
def initialize(line)
|
13
|
+
@line = line
|
14
|
+
@tokens = []
|
15
|
+
@current_counter = 0
|
16
|
+
@base_counter = 0
|
17
|
+
end
|
18
|
+
|
19
|
+
def call
|
20
|
+
resolve_text_into_tokens until end_of_line?
|
21
|
+
consume_plain_text_up_to(@current_counter)
|
22
|
+
|
23
|
+
@tokens << NewLine.new
|
24
|
+
@tokens
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def end_of_line?
|
30
|
+
@current_counter >= @line.length - 1
|
31
|
+
end
|
32
|
+
|
33
|
+
def resolve_text_into_tokens
|
34
|
+
matching_token_type = match_token_type
|
35
|
+
|
36
|
+
if matching_token_type.nil?
|
37
|
+
@current_counter += 1
|
38
|
+
else
|
39
|
+
handle_matching_token(matching_token_type)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def match_token_type
|
44
|
+
current_line = @line[@current_counter..]
|
45
|
+
|
46
|
+
inline_token_classes.find do |token_type|
|
47
|
+
token_type.matches?(current_line)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def handle_matching_token(token_type)
|
52
|
+
# We don't want to consume the token type delimiter.
|
53
|
+
consume_plain_text_up_to(@current_counter - 1) unless @current_counter.zero?
|
54
|
+
@tokens << consume(token_type)
|
55
|
+
end
|
56
|
+
|
57
|
+
def consume_plain_text_up_to(final_index)
|
58
|
+
plain_text = @line[@base_counter..final_index]
|
59
|
+
@base_counter = @current_counter
|
60
|
+
@tokens << Text.new(plain_text) unless plain_text.length.zero?
|
61
|
+
end
|
62
|
+
|
63
|
+
def inline_token_classes
|
64
|
+
@inline_token_classes ||= BasicToken::INLINE_CLASS_NAMES.map { |type| constantize_type(type) }
|
65
|
+
end
|
66
|
+
|
67
|
+
def consume(token_type)
|
68
|
+
token = token_type.consume(@line[@base_counter..])
|
69
|
+
length_consumed = token.source_text.length
|
70
|
+
|
71
|
+
@current_counter += length_consumed
|
72
|
+
@base_counter = @current_counter
|
73
|
+
|
74
|
+
token
|
75
|
+
end
|
76
|
+
|
77
|
+
def constantize_type(type)
|
78
|
+
Object.const_get(type)
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/service_base'
|
4
|
+
require './lib/rosetta/services/token_resolver'
|
5
|
+
|
6
|
+
# Scans and tokenises source text.
|
7
|
+
class InputScanner < ServiceBase
|
8
|
+
def initialize(input)
|
9
|
+
@input = input
|
10
|
+
end
|
11
|
+
|
12
|
+
def call
|
13
|
+
@input.split("\n").map { |line| TokenResolver.call(line) }.flatten
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/service_base'
|
4
|
+
require './lib/rosetta/services/inline_token_resolver'
|
5
|
+
require './lib/rosetta/tokens/basic_list_item'
|
6
|
+
require './lib/rosetta/tokens/code_block_delimiter'
|
7
|
+
require './lib/rosetta/tokens/header'
|
8
|
+
require './lib/rosetta/tokens/line_break'
|
9
|
+
require './lib/rosetta/tokens/link'
|
10
|
+
require './lib/rosetta/tokens/numbered_list_item'
|
11
|
+
require './lib/rosetta/tokens/quote'
|
12
|
+
require './lib/rosetta/tokens/text'
|
13
|
+
require './lib/rosetta/tokens/basic_token'
|
14
|
+
|
15
|
+
# Resolves token types from source text.
|
16
|
+
class TokenResolver < ServiceBase
|
17
|
+
def initialize(text)
|
18
|
+
@text = text
|
19
|
+
end
|
20
|
+
|
21
|
+
def call
|
22
|
+
top_level_token_classes.each do |type_class|
|
23
|
+
return type_class.new(@text) if type_class.matches?(@text)
|
24
|
+
end
|
25
|
+
|
26
|
+
resolve_text_block
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def top_level_token_classes
|
32
|
+
BasicToken::TOP_LEVEL_CLASS_NAMES.map { |type| constantize_type(type) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def resolve_text_block
|
36
|
+
InlineTokenResolver.call(@text)
|
37
|
+
end
|
38
|
+
|
39
|
+
def constantize_type(type)
|
40
|
+
Object.const_get(type)
|
41
|
+
end
|
42
|
+
end
|