rosetta-ruby 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/.DS_Store +0 -0
  3. data/.gitignore +58 -0
  4. data/.rubocop.yml +8 -0
  5. data/.vscode/settings.json +3 -0
  6. data/Gemfile +7 -0
  7. data/Gemfile.lock +16 -0
  8. data/README.md +48 -0
  9. data/TODOS.md +11 -0
  10. data/lib/rosetta/abstract_syntax_tree.rb +28 -0
  11. data/lib/rosetta/formatters/html/html_generator.rb +96 -0
  12. data/lib/rosetta/formatters/html/html_writer.rb +19 -0
  13. data/lib/rosetta/service_base.rb +8 -0
  14. data/lib/rosetta/services/inline_token_resolver.rb +80 -0
  15. data/lib/rosetta/services/input_scanner.rb +15 -0
  16. data/lib/rosetta/services/token_resolver.rb +42 -0
  17. data/lib/rosetta/services/tree_parser.rb +126 -0
  18. data/lib/rosetta/tokens/basic_list.rb +10 -0
  19. data/lib/rosetta/tokens/basic_list_item.rb +19 -0
  20. data/lib/rosetta/tokens/basic_token.rb +51 -0
  21. data/lib/rosetta/tokens/block_quote.rb +10 -0
  22. data/lib/rosetta/tokens/block_token.rb +36 -0
  23. data/lib/rosetta/tokens/bold.rb +21 -0
  24. data/lib/rosetta/tokens/break.rb +20 -0
  25. data/lib/rosetta/tokens/code_block.rb +10 -0
  26. data/lib/rosetta/tokens/code_block_delimiter.rb +18 -0
  27. data/lib/rosetta/tokens/header.rb +42 -0
  28. data/lib/rosetta/tokens/inline_code.rb +25 -0
  29. data/lib/rosetta/tokens/italics.rb +21 -0
  30. data/lib/rosetta/tokens/line_break.rb +18 -0
  31. data/lib/rosetta/tokens/link.rb +66 -0
  32. data/lib/rosetta/tokens/new_line.rb +20 -0
  33. data/lib/rosetta/tokens/numbered_list.rb +10 -0
  34. data/lib/rosetta/tokens/numbered_list_item.rb +25 -0
  35. data/lib/rosetta/tokens/paragraph.rb +10 -0
  36. data/lib/rosetta/tokens/quote.rb +24 -0
  37. data/lib/rosetta/tokens/shared/inline_tokens.rb +30 -0
  38. data/lib/rosetta/tokens/strikethrough.rb +29 -0
  39. data/lib/rosetta/tokens/text.rb +14 -0
  40. data/lib/rosetta-ruby.rb +15 -0
  41. data/main.rb +21 -0
  42. data/rosetta-ruby.gemspec +16 -0
  43. data/samples/all.md +50 -0
  44. data/samples/bold.md +1 -0
  45. data/samples/code_blocks.md +5 -0
  46. data/samples/headers.md +11 -0
  47. data/samples/link.md +3 -0
  48. data/samples/lists.md +10 -0
  49. data/samples/quotes.md +6 -0
  50. data/samples/text.md +5 -0
  51. metadata +91 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 48ee42bc64f9c983f926292355d3b43924f2ed9f46650218ce0f18b3cca357b5
4
+ data.tar.gz: 95d425b76360875deed5853a549b2f25cee0c3b96ce9b9a97187243564885c62
5
+ SHA512:
6
+ metadata.gz: 45e4186535f9b6a4093d6c90d751e188838ba75d3744bfa6b45d52bb20e66f15d1c2e98758f9c08cdc17e48cec06ede4bbe9d644ae815fa9a75ca377f07b2cec
7
+ data.tar.gz: 7d8b107a44460bfcf0dc67ddc24be074625ffa20a42e5e5755c06e583c58755b9fdb6125f50504660a66592846af8f91ad752966727c99d8eb4cdd596f4748a9
data/.DS_Store ADDED
Binary file
data/.gitignore ADDED
@@ -0,0 +1,58 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /tmp/
12
+
13
+ # Used by dotenv library to load environment variables.
14
+ # .env
15
+
16
+ # Ignore Byebug command history file.
17
+ .byebug_history
18
+
19
+ /.vscode/
20
+
21
+ ## Specific to RubyMotion:
22
+ .dat*
23
+ .repl_history
24
+ build/
25
+ *.bridgesupport
26
+ build-iPhoneOS/
27
+ build-iPhoneSimulator/
28
+
29
+ ## Specific to RubyMotion (use of CocoaPods):
30
+ #
31
+ # We recommend against adding the Pods directory to your .gitignore. However
32
+ # you should judge for yourself, the pros and cons are mentioned at:
33
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
34
+ #
35
+ # vendor/Pods/
36
+
37
+ ## Documentation cache and generated files:
38
+ /.yardoc/
39
+ /_yardoc/
40
+ /doc/
41
+ /rdoc/
42
+
43
+ ## Environment normalization:
44
+ /.bundle/
45
+ /vendor/bundle
46
+ /lib/bundler/man/
47
+
48
+ # for a library or gem, you might want to ignore these files since the code is
49
+ # intended to run in multiple environments; otherwise, check them in:
50
+ # Gemfile.lock
51
+ # .ruby-version
52
+ # .ruby-gemset
53
+
54
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
55
+ .rvmrc
56
+
57
+ # Used by RuboCop. Remote config files pulled in from inherit_from directive.
58
+ # .rubocop-https?--*
data/.rubocop.yml ADDED
@@ -0,0 +1,8 @@
1
+ Lint/MissingSuper:
2
+ Enabled: false
3
+
4
+ Style/SymbolArray:
5
+ Enabled: false
6
+
7
+ Layout/LineLength:
8
+ Max: 100
@@ -0,0 +1,3 @@
1
+ {
2
+ "ruby.rubocop.configFilePath": ".rubocop.yml"
3
+ }
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+ ruby '3.0.0'
5
+
6
+ gem 'byebug'
7
+ gem 'rspec'
data/Gemfile.lock ADDED
@@ -0,0 +1,16 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ byebug (11.1.3)
5
+
6
+ PLATFORMS
7
+ x86_64-darwin-20
8
+
9
+ DEPENDENCIES
10
+ byebug
11
+
12
+ RUBY VERSION
13
+ ruby 3.0.0p0
14
+
15
+ BUNDLED WITH
16
+ 2.2.3
data/README.md ADDED
@@ -0,0 +1,48 @@
1
+ # rosetta
2
+
3
+ A Markdown to HTML translator.
4
+
5
+ **[April 15th 2021] This project is a work in progress.**
6
+
7
+ I've recently taken an interest in the syntax and semantics of programming languages, and would
8
+ like to explore some of the practices involved in writing one. Having said that, I don't want to
9
+ dive into writing a full featured programming language for various reasons, the primary one being
10
+ that I don't have any use cases where an existing language could not trivially satisfy my programming
11
+ needs.
12
+
13
+ With that in mind I'm going to start smaller with a translator that takes a Markdown source file
14
+ and outputs HTML. In an ideal world I'd use it as part of another project, but regardless of whether
15
+ I get that far I'm going to enjoy digging into the challenges involved.
16
+
17
+ My initial plan is to try and parse Markdown source into an abstract syntax tree of some sort, and then convert
18
+ that AST into HTML. I'll start with a _subset_ of GitHub flavoured Markdown; I use GitHub's Markdown
19
+ on a near daily basis, and if I want to use this elsewhere I probably won't have need for a full
20
+ Markdown feature set.
21
+
22
+ ## Markdown syntax
23
+
24
+ Rosetta will support the following features. For examples of what these look like in use,
25
+ visit [the GitHub Markdown Guide](https://guides.github.com/features/mastering-markdown/).
26
+ I'll mark each item as completed once Rosetta can convert them to its AST representation.
27
+
28
+ ### Block level
29
+
30
+ - [x] Headers from 1-6
31
+ - [x] Numbered and bulleted lists, as well as sub lists
32
+ - [x] Quotes
33
+ - [x] Standalone code blocks
34
+ - [x] Block URL links
35
+ - [ ] Block image embedding
36
+
37
+ ### Inline
38
+
39
+ - [x] Bold, italic, strikethrough
40
+ - [x] Inline URL links
41
+ - [x] Inline code blocks
42
+ - [ ] Inline image embedding
43
+
44
+ ### Possible future syntax
45
+
46
+ - [ ] Superscript
47
+ - [ ] Subscript
48
+ - [ ] Code block language types
data/TODOS.md ADDED
@@ -0,0 +1,11 @@
1
+ - [ ] Refactor to accept string input instead of a file
2
+ - [x] Refactor Scanner into action
3
+ - [x] Move HTML generation into formatter folder
4
+ - [ ] General refactor
5
+ - [ ] Gemify
6
+ - [ ] Support inline nesting
7
+ - [ ] Support block nesting
8
+ - [ ] Revisit token matching
9
+ - [ ] Implement visitor pattern for matching tokens?
10
+ - [ ] Implement visitor pattern for consuming tokens?
11
+ - [ ] Add support for escaping delimiters (eg \*)
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/services/tree_parser'
4
+
5
+ # Holds all logic for creating the AST, including parsing.
6
+ class AbstractSyntaxTree
7
+ attr_reader :base_tokens
8
+
9
+ def initialize(base_tokens)
10
+ @base_tokens = base_tokens
11
+ end
12
+
13
+ def token_tree
14
+ @token_tree ||= TreeParser.call(@base_tokens)
15
+ end
16
+
17
+ def formatted_token_tree
18
+ token_tree.map(&:to_s)
19
+ end
20
+
21
+ def node_representation
22
+ token_tree.map(&:node_representation)
23
+ end
24
+
25
+ def raw_tokens
26
+ @base_tokens.map(&:to_s)
27
+ end
28
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Generates HTML for a given AST token.
4
+ class HTMLGenerator
5
+ def initialize
6
+ define_header_generators
7
+ end
8
+
9
+ def generate(token)
10
+ token.accept(self)
11
+ end
12
+
13
+ def generate_basic_list(token)
14
+ generate_list(token.children, 'ul')
15
+ end
16
+
17
+ def generate_numbered_list(token)
18
+ generate_list(token.children, 'ol')
19
+ end
20
+
21
+ def generate_list_item(token)
22
+ "<li>#{token.value}</li>\n"
23
+ end
24
+
25
+ def generate_block_quote(token)
26
+ child_tokens = token.children.map { |child_token| generate(child_token) }
27
+
28
+ ["<pre><blockquote>\n", child_tokens, "</blockquote></pre>\n"].join
29
+ end
30
+
31
+ def generate_code_block(token)
32
+ child_tokens = token.children.map { |child_token| generate(child_token) }
33
+
34
+ ["<pre><code>\n", child_tokens, "</code></pre>\n"].compact.join
35
+ end
36
+
37
+ def generate_new_line(_token)
38
+ "\n"
39
+ end
40
+
41
+ def generate_quote(token)
42
+ "#{token.value}\n"
43
+ end
44
+
45
+ def generate_text(token)
46
+ token.value
47
+ end
48
+
49
+ def generate_break(_token)
50
+ "</br>\n"
51
+ end
52
+
53
+ def generate_paragraph(token)
54
+ child_tokens = token.children.map { |child_token| generate(child_token) }
55
+
56
+ ["<p>\n", child_tokens, "</p>\n"].join
57
+ end
58
+
59
+ def generate_strikethrough(token)
60
+ "<del>#{token.value}</del>"
61
+ end
62
+
63
+ def generate_italics(token)
64
+ "<em>#{token.value}</em>"
65
+ end
66
+
67
+ def generate_inline_code(token)
68
+ "<code>#{token.value}</code>"
69
+ end
70
+
71
+ def generate_bold(token)
72
+ "<b>#{token.value}</b>"
73
+ end
74
+
75
+ def generate_link(token)
76
+ "<a href='#{token.url}'>#{token.value}</a>"
77
+ end
78
+
79
+ private
80
+
81
+ def define_header_generators
82
+ (1..6).each do |header_level|
83
+ method_name = "generate_header_#{header_level}"
84
+
85
+ self.class.define_method(method_name) do |token|
86
+ "<h#{header_level}>#{token.value}</h#{header_level}>\n"
87
+ end
88
+ end
89
+ end
90
+
91
+ def generate_list(child_tokens, tag)
92
+ child_token_strings = child_tokens.map { |child_token| generate_list_item(child_token) }
93
+
94
+ ["<#{tag}>\n", child_token_strings, "</#{tag}>\n"].join
95
+ end
96
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/service_base'
4
+ require './lib/rosetta/formatters/html/html_generator'
5
+
6
+ # Turns a tree of tokens into it's corresponding HTML output.
7
+ class HTMLWriter < ServiceBase
8
+ def initialize(token_tree)
9
+ @token_tree = token_tree
10
+ end
11
+
12
+ def call
13
+ @token_tree.map { |token| html_generator.generate(token) }.compact.join
14
+ end
15
+
16
+ def html_generator
17
+ @html_generator ||= HTMLGenerator.new
18
+ end
19
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Base Service class.
4
+ class ServiceBase
5
+ def self.call(*args, &block)
6
+ new(*args, &block).call
7
+ end
8
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/service_base'
4
+ require './lib/rosetta/tokens/text'
5
+ require './lib/rosetta/tokens/bold'
6
+ require './lib/rosetta/tokens/italics'
7
+ require './lib/rosetta/tokens/strikethrough'
8
+ require './lib/rosetta/tokens/inline_code'
9
+
10
+ # Matches and extracts tokens contained in text.
11
+ class InlineTokenResolver < ServiceBase
12
+ def initialize(line)
13
+ @line = line
14
+ @tokens = []
15
+ @current_counter = 0
16
+ @base_counter = 0
17
+ end
18
+
19
+ def call
20
+ resolve_text_into_tokens until end_of_line?
21
+ consume_plain_text_up_to(@current_counter)
22
+
23
+ @tokens << NewLine.new
24
+ @tokens
25
+ end
26
+
27
+ private
28
+
29
+ def end_of_line?
30
+ @current_counter >= @line.length - 1
31
+ end
32
+
33
+ def resolve_text_into_tokens
34
+ matching_token_type = match_token_type
35
+
36
+ if matching_token_type.nil?
37
+ @current_counter += 1
38
+ else
39
+ handle_matching_token(matching_token_type)
40
+ end
41
+ end
42
+
43
+ def match_token_type
44
+ current_line = @line[@current_counter..]
45
+
46
+ inline_token_classes.find do |token_type|
47
+ token_type.matches?(current_line)
48
+ end
49
+ end
50
+
51
+ def handle_matching_token(token_type)
52
+ # We don't want to consume the token type delimiter.
53
+ consume_plain_text_up_to(@current_counter - 1) unless @current_counter.zero?
54
+ @tokens << consume(token_type)
55
+ end
56
+
57
+ def consume_plain_text_up_to(final_index)
58
+ plain_text = @line[@base_counter..final_index]
59
+ @base_counter = @current_counter
60
+ @tokens << Text.new(plain_text) unless plain_text.length.zero?
61
+ end
62
+
63
+ def inline_token_classes
64
+ @inline_token_classes ||= BasicToken::INLINE_CLASS_NAMES.map { |type| constantize_type(type) }
65
+ end
66
+
67
+ def consume(token_type)
68
+ token = token_type.consume(@line[@base_counter..])
69
+ length_consumed = token.source_text.length
70
+
71
+ @current_counter += length_consumed
72
+ @base_counter = @current_counter
73
+
74
+ token
75
+ end
76
+
77
+ def constantize_type(type)
78
+ Object.const_get(type)
79
+ end
80
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/service_base'
4
+ require './lib/rosetta/services/token_resolver'
5
+
6
+ # Scans and tokenises source text.
7
+ class InputScanner < ServiceBase
8
+ def initialize(input)
9
+ @input = input
10
+ end
11
+
12
+ def call
13
+ @input.split("\n").map { |line| TokenResolver.call(line) }.flatten
14
+ end
15
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/service_base'
4
+ require './lib/rosetta/services/inline_token_resolver'
5
+ require './lib/rosetta/tokens/basic_list_item'
6
+ require './lib/rosetta/tokens/code_block_delimiter'
7
+ require './lib/rosetta/tokens/header'
8
+ require './lib/rosetta/tokens/line_break'
9
+ require './lib/rosetta/tokens/link'
10
+ require './lib/rosetta/tokens/numbered_list_item'
11
+ require './lib/rosetta/tokens/quote'
12
+ require './lib/rosetta/tokens/text'
13
+ require './lib/rosetta/tokens/basic_token'
14
+
15
+ # Resolves token types from source text.
16
+ class TokenResolver < ServiceBase
17
+ def initialize(text)
18
+ @text = text
19
+ end
20
+
21
+ def call
22
+ top_level_token_classes.each do |type_class|
23
+ return type_class.new(@text) if type_class.matches?(@text)
24
+ end
25
+
26
+ resolve_text_block
27
+ end
28
+
29
+ private
30
+
31
+ def top_level_token_classes
32
+ BasicToken::TOP_LEVEL_CLASS_NAMES.map { |type| constantize_type(type) }
33
+ end
34
+
35
+ def resolve_text_block
36
+ InlineTokenResolver.call(@text)
37
+ end
38
+
39
+ def constantize_type(type)
40
+ Object.const_get(type)
41
+ end
42
+ end