rosetta-ruby 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.DS_Store +0 -0
- data/.gitignore +58 -0
- data/.rubocop.yml +8 -0
- data/.vscode/settings.json +3 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +16 -0
- data/README.md +48 -0
- data/TODOS.md +11 -0
- data/lib/rosetta/abstract_syntax_tree.rb +28 -0
- data/lib/rosetta/formatters/html/html_generator.rb +96 -0
- data/lib/rosetta/formatters/html/html_writer.rb +19 -0
- data/lib/rosetta/service_base.rb +8 -0
- data/lib/rosetta/services/inline_token_resolver.rb +80 -0
- data/lib/rosetta/services/input_scanner.rb +15 -0
- data/lib/rosetta/services/token_resolver.rb +42 -0
- data/lib/rosetta/services/tree_parser.rb +126 -0
- data/lib/rosetta/tokens/basic_list.rb +10 -0
- data/lib/rosetta/tokens/basic_list_item.rb +19 -0
- data/lib/rosetta/tokens/basic_token.rb +51 -0
- data/lib/rosetta/tokens/block_quote.rb +10 -0
- data/lib/rosetta/tokens/block_token.rb +36 -0
- data/lib/rosetta/tokens/bold.rb +21 -0
- data/lib/rosetta/tokens/break.rb +20 -0
- data/lib/rosetta/tokens/code_block.rb +10 -0
- data/lib/rosetta/tokens/code_block_delimiter.rb +18 -0
- data/lib/rosetta/tokens/header.rb +42 -0
- data/lib/rosetta/tokens/inline_code.rb +25 -0
- data/lib/rosetta/tokens/italics.rb +21 -0
- data/lib/rosetta/tokens/line_break.rb +18 -0
- data/lib/rosetta/tokens/link.rb +66 -0
- data/lib/rosetta/tokens/new_line.rb +20 -0
- data/lib/rosetta/tokens/numbered_list.rb +10 -0
- data/lib/rosetta/tokens/numbered_list_item.rb +25 -0
- data/lib/rosetta/tokens/paragraph.rb +10 -0
- data/lib/rosetta/tokens/quote.rb +24 -0
- data/lib/rosetta/tokens/shared/inline_tokens.rb +30 -0
- data/lib/rosetta/tokens/strikethrough.rb +29 -0
- data/lib/rosetta/tokens/text.rb +14 -0
- data/lib/rosetta-ruby.rb +15 -0
- data/main.rb +21 -0
- data/rosetta-ruby.gemspec +16 -0
- data/samples/all.md +50 -0
- data/samples/bold.md +1 -0
- data/samples/code_blocks.md +5 -0
- data/samples/headers.md +11 -0
- data/samples/link.md +3 -0
- data/samples/lists.md +10 -0
- data/samples/quotes.md +6 -0
- data/samples/text.md +5 -0
- metadata +91 -0
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/service_base'
|
4
|
+
require './lib/rosetta/tokens/basic_list_item'
|
5
|
+
require './lib/rosetta/tokens/code_block_delimiter'
|
6
|
+
require './lib/rosetta/tokens/code_block'
|
7
|
+
require './lib/rosetta/tokens/header'
|
8
|
+
require './lib/rosetta/tokens/break'
|
9
|
+
require './lib/rosetta/tokens/line_break'
|
10
|
+
require './lib/rosetta/tokens/link'
|
11
|
+
require './lib/rosetta/tokens/numbered_list_item'
|
12
|
+
require './lib/rosetta/tokens/paragraph'
|
13
|
+
require './lib/rosetta/tokens/block_quote'
|
14
|
+
require './lib/rosetta/tokens/quote'
|
15
|
+
require './lib/rosetta/tokens/text'
|
16
|
+
require './lib/rosetta/tokens/basic_token'
|
17
|
+
require './lib/rosetta/tokens/basic_list'
|
18
|
+
require './lib/rosetta/tokens/numbered_list'
|
19
|
+
require './lib/rosetta/tokens/block_token'
|
20
|
+
require './lib/rosetta/tokens/new_line'
|
21
|
+
|
22
|
+
# Parse tokens into AST structure.
|
23
|
+
class TreeParser < ServiceBase
|
24
|
+
BLOCK_TOKEN_TYPES = {
|
25
|
+
QUOTE: BlockQuote,
|
26
|
+
BASIC_LIST_ITEM: BasicList,
|
27
|
+
NUMBERED_LIST_ITEM: NumberedList
|
28
|
+
}.freeze
|
29
|
+
|
30
|
+
def initialize(tokens)
|
31
|
+
@tokens = tokens
|
32
|
+
@counter = 0
|
33
|
+
end
|
34
|
+
|
35
|
+
# TODO: Investigate if this can be done without declaring and mutating the array.
|
36
|
+
def call
|
37
|
+
parsed_tree = []
|
38
|
+
parsed_tree << consume_next_block until end_of_file?
|
39
|
+
|
40
|
+
parsed_tree.compact
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def consume_next_block
|
46
|
+
block_types = BLOCK_TOKEN_TYPES.keys + [:CODE_BLOCK_DELIMITER]
|
47
|
+
|
48
|
+
if block_types.include?(current_token.type)
|
49
|
+
handle_generic_block_types(current_token.type)
|
50
|
+
elsif current_token.type == :LINE_BREAK
|
51
|
+
match_break
|
52
|
+
elsif current_token.inline?
|
53
|
+
match_paragraph
|
54
|
+
else
|
55
|
+
consume_current_token
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def current_token
|
60
|
+
@tokens[@counter]
|
61
|
+
end
|
62
|
+
|
63
|
+
def next_token
|
64
|
+
@tokens[@counter + 1]
|
65
|
+
end
|
66
|
+
|
67
|
+
def match_break
|
68
|
+
if next_token.type == :LINE_BREAK
|
69
|
+
@counter += 2
|
70
|
+
Break.new
|
71
|
+
else
|
72
|
+
@counter += 1
|
73
|
+
# We don't want to insert newlines all the time, the desired
|
74
|
+
# insertion is handled in paragraph grouping.
|
75
|
+
nil
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def handle_generic_block_types(token_type)
|
80
|
+
return match_code_block if token_type == :CODE_BLOCK_DELIMITER
|
81
|
+
|
82
|
+
group_siblings_into_block(token_type)
|
83
|
+
end
|
84
|
+
|
85
|
+
def group_siblings_into_block(block_type)
|
86
|
+
block_class = BLOCK_TOKEN_TYPES[block_type]
|
87
|
+
|
88
|
+
child_tokens = []
|
89
|
+
child_tokens << consume_current_token while !end_of_file? && current_token.type == block_type
|
90
|
+
|
91
|
+
block_class.new(child_tokens)
|
92
|
+
end
|
93
|
+
|
94
|
+
def match_paragraph
|
95
|
+
child_tokens = []
|
96
|
+
|
97
|
+
while !end_of_file? && (current_token.inline? || current_token.type == :NEW_LINE)
|
98
|
+
child_tokens << consume_current_token
|
99
|
+
end
|
100
|
+
|
101
|
+
Paragraph.new(child_tokens)
|
102
|
+
end
|
103
|
+
|
104
|
+
def match_code_block
|
105
|
+
@counter += 1
|
106
|
+
child_tokens = []
|
107
|
+
|
108
|
+
while !end_of_file? && current_token.type != :CODE_BLOCK_DELIMITER
|
109
|
+
child_tokens << current_token
|
110
|
+
@counter += 1
|
111
|
+
end
|
112
|
+
|
113
|
+
@counter += 1 unless end_of_file?
|
114
|
+
CodeBlock.new(child_tokens)
|
115
|
+
end
|
116
|
+
|
117
|
+
def consume_current_token
|
118
|
+
frozen_current_token = current_token
|
119
|
+
@counter += 1
|
120
|
+
frozen_current_token
|
121
|
+
end
|
122
|
+
|
123
|
+
def end_of_file?
|
124
|
+
@counter > @tokens.length - 1
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/tokens/basic_token'
|
4
|
+
|
5
|
+
# Handles logic for Header tokens
|
6
|
+
class BasicListItem < BasicToken
|
7
|
+
def self.matches?(text)
|
8
|
+
text.start_with?('* ') || text.start_with?('- ')
|
9
|
+
end
|
10
|
+
|
11
|
+
def type
|
12
|
+
:BASIC_LIST_ITEM
|
13
|
+
end
|
14
|
+
|
15
|
+
def value
|
16
|
+
# If the code reaches here we know the first 2 chars are the list delimiter.
|
17
|
+
@source_text[2..-1]
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'byebug'
|
4
|
+
|
5
|
+
# Holds logic and information for basic token management.
|
6
|
+
class BasicToken
|
7
|
+
TOP_LEVEL_CLASS_NAMES = [
|
8
|
+
:Header, :LineBreak, :Quote, :CodeBlockDelimiter, :BasicListItem, :NumberedListItem, :Link
|
9
|
+
].freeze
|
10
|
+
|
11
|
+
INLINE_CLASS_NAMES = [:Bold, :Italics, :Strikethrough, :InlineCode, :Link].freeze
|
12
|
+
|
13
|
+
attr_reader :source_text
|
14
|
+
|
15
|
+
def initialize(source_text)
|
16
|
+
@source_text = source_text
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_s
|
20
|
+
"<Token type='#{type}' value='#{value}'>"
|
21
|
+
end
|
22
|
+
|
23
|
+
def node_representation
|
24
|
+
"<#{type} value='#{value}'>"
|
25
|
+
end
|
26
|
+
|
27
|
+
def type
|
28
|
+
raise 'Subclass should handle type.'
|
29
|
+
end
|
30
|
+
|
31
|
+
def value
|
32
|
+
raise 'Subclass should handle value.'
|
33
|
+
end
|
34
|
+
|
35
|
+
def accept(visitor)
|
36
|
+
method_name = "generate_#{type.to_s.downcase}"
|
37
|
+
visitor.send(method_name.to_sym, self)
|
38
|
+
end
|
39
|
+
|
40
|
+
# TODO: Make references to token type constant everywhere
|
41
|
+
def inline?
|
42
|
+
INLINE_CLASS_NAMES.include?(camel_case_type) || type == :TEXT
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def camel_case_type
|
48
|
+
# Example: :INLINE_CODE -> :InlineCode
|
49
|
+
type.to_s.downcase.split('_').map(&:downcase).map(&:capitalize).join.to_sym
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Holds logic and information for block token management.
|
4
|
+
class BlockToken
|
5
|
+
attr_reader :children
|
6
|
+
|
7
|
+
def initialize(tokens)
|
8
|
+
@children = tokens
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
"<Token type='#{type}' child_count='#{children.count}' children='#{children}'>"
|
13
|
+
end
|
14
|
+
|
15
|
+
def node_representation
|
16
|
+
opening_tag = "<#{type} child_count=#{children.count}>"
|
17
|
+
closing_tag = "<#{type} />"
|
18
|
+
|
19
|
+
contents = children.map { |token| " #{token.node_representation}" }.join("\n")
|
20
|
+
|
21
|
+
"#{opening_tag}\n#{contents}\n#{closing_tag}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def type
|
25
|
+
raise 'Subclass should handle #type.'
|
26
|
+
end
|
27
|
+
|
28
|
+
def value
|
29
|
+
raise 'Subclass should handle #value.'
|
30
|
+
end
|
31
|
+
|
32
|
+
def accept(visitor)
|
33
|
+
method_name = "generate_#{type.to_s.downcase}"
|
34
|
+
visitor.send(method_name.to_sym, self)
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/tokens/basic_token'
|
4
|
+
require './lib/rosetta/tokens/shared/inline_tokens'
|
5
|
+
|
6
|
+
# Handles logic for Bold tokens
|
7
|
+
class Bold < BasicToken
|
8
|
+
extend Shared::InlineTokens
|
9
|
+
|
10
|
+
DELIMITER_TOKEN = '*'
|
11
|
+
|
12
|
+
def type
|
13
|
+
:BOLD
|
14
|
+
end
|
15
|
+
|
16
|
+
def value
|
17
|
+
return DELIMITER_TOKEN * 2 if @source_text == DELIMITER_TOKEN * 2
|
18
|
+
|
19
|
+
@source_text[self.class.delimiter_length...-self.class.delimiter_length]
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/tokens/basic_token'
|
4
|
+
|
5
|
+
# Handles logic for Break tokens
|
6
|
+
class Break < BasicToken
|
7
|
+
def self.matches?(text)
|
8
|
+
text == ''
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize; end
|
12
|
+
|
13
|
+
def type
|
14
|
+
:BREAK
|
15
|
+
end
|
16
|
+
|
17
|
+
def value
|
18
|
+
nil
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/tokens/basic_token'
|
4
|
+
|
5
|
+
# Handles logic for Header tokens
|
6
|
+
class CodeBlockDelimiter < BasicToken
|
7
|
+
def self.matches?(text)
|
8
|
+
text == '```'
|
9
|
+
end
|
10
|
+
|
11
|
+
def type
|
12
|
+
:CODE_BLOCK_DELIMITER
|
13
|
+
end
|
14
|
+
|
15
|
+
def value
|
16
|
+
nil
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/tokens/basic_token'
|
4
|
+
|
5
|
+
# Handles logic for Header tokens
|
6
|
+
class Header < BasicToken
|
7
|
+
def self.matches?(text)
|
8
|
+
return false unless text[0] == '#'
|
9
|
+
|
10
|
+
text.chars.each do |char|
|
11
|
+
next if char == '#'
|
12
|
+
|
13
|
+
return char == ' '
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(source_text)
|
18
|
+
super(source_text)
|
19
|
+
@depth = determine_depth
|
20
|
+
end
|
21
|
+
|
22
|
+
def type
|
23
|
+
"HEADER_#{@depth}".to_sym
|
24
|
+
end
|
25
|
+
|
26
|
+
def value
|
27
|
+
@value ||= extract_value_from_text
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def extract_value_from_text
|
33
|
+
prefix = "#{'#' * @depth} "
|
34
|
+
@source_text.delete_prefix(prefix)
|
35
|
+
end
|
36
|
+
|
37
|
+
def determine_depth
|
38
|
+
depth = 0
|
39
|
+
depth += 1 while @source_text[depth] == '#'
|
40
|
+
depth
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/tokens/basic_token'
|
4
|
+
require './lib/rosetta/tokens/shared/inline_tokens'
|
5
|
+
|
6
|
+
# Handles logic for Inline Code tokens
|
7
|
+
class InlineCode < BasicToken
|
8
|
+
extend Shared::InlineTokens
|
9
|
+
|
10
|
+
DELIMITER_TOKEN = '`'
|
11
|
+
|
12
|
+
def self.delimiter_length
|
13
|
+
DELIMITER_TOKEN.length
|
14
|
+
end
|
15
|
+
|
16
|
+
def type
|
17
|
+
:INLINE_CODE
|
18
|
+
end
|
19
|
+
|
20
|
+
def value
|
21
|
+
return DELIMITER_TOKEN * 2 if @source_text == DELIMITER_TOKEN * 2
|
22
|
+
|
23
|
+
@source_text[self.class.delimiter_length...-self.class.delimiter_length]
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/tokens/basic_token'
|
4
|
+
require './lib/rosetta/tokens/shared/inline_tokens'
|
5
|
+
|
6
|
+
# Handles logic for Italics tokens
|
7
|
+
class Italics < BasicToken
|
8
|
+
extend Shared::InlineTokens
|
9
|
+
|
10
|
+
DELIMITER_TOKEN = '_'
|
11
|
+
|
12
|
+
def type
|
13
|
+
:ITALICS
|
14
|
+
end
|
15
|
+
|
16
|
+
def value
|
17
|
+
return DELIMITER_TOKEN * 2 if @source_text == DELIMITER_TOKEN * 2
|
18
|
+
|
19
|
+
@source_text[self.class.delimiter_length...-self.class.delimiter_length]
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/rosetta/tokens/basic_token'
|
4
|
+
|
5
|
+
# Handles logic for Line Break tokens
|
6
|
+
class LineBreak < BasicToken
|
7
|
+
def self.matches?(text)
|
8
|
+
text == ''
|
9
|
+
end
|
10
|
+
|
11
|
+
def type
|
12
|
+
:LINE_BREAK
|
13
|
+
end
|
14
|
+
|
15
|
+
def value
|
16
|
+
nil
|
17
|
+
end
|
18
|
+
end
|