rosetta-ruby 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/.DS_Store +0 -0
  3. data/.gitignore +58 -0
  4. data/.rubocop.yml +8 -0
  5. data/.vscode/settings.json +3 -0
  6. data/Gemfile +7 -0
  7. data/Gemfile.lock +16 -0
  8. data/README.md +48 -0
  9. data/TODOS.md +11 -0
  10. data/lib/rosetta/abstract_syntax_tree.rb +28 -0
  11. data/lib/rosetta/formatters/html/html_generator.rb +96 -0
  12. data/lib/rosetta/formatters/html/html_writer.rb +19 -0
  13. data/lib/rosetta/service_base.rb +8 -0
  14. data/lib/rosetta/services/inline_token_resolver.rb +80 -0
  15. data/lib/rosetta/services/input_scanner.rb +15 -0
  16. data/lib/rosetta/services/token_resolver.rb +42 -0
  17. data/lib/rosetta/services/tree_parser.rb +126 -0
  18. data/lib/rosetta/tokens/basic_list.rb +10 -0
  19. data/lib/rosetta/tokens/basic_list_item.rb +19 -0
  20. data/lib/rosetta/tokens/basic_token.rb +51 -0
  21. data/lib/rosetta/tokens/block_quote.rb +10 -0
  22. data/lib/rosetta/tokens/block_token.rb +36 -0
  23. data/lib/rosetta/tokens/bold.rb +21 -0
  24. data/lib/rosetta/tokens/break.rb +20 -0
  25. data/lib/rosetta/tokens/code_block.rb +10 -0
  26. data/lib/rosetta/tokens/code_block_delimiter.rb +18 -0
  27. data/lib/rosetta/tokens/header.rb +42 -0
  28. data/lib/rosetta/tokens/inline_code.rb +25 -0
  29. data/lib/rosetta/tokens/italics.rb +21 -0
  30. data/lib/rosetta/tokens/line_break.rb +18 -0
  31. data/lib/rosetta/tokens/link.rb +66 -0
  32. data/lib/rosetta/tokens/new_line.rb +20 -0
  33. data/lib/rosetta/tokens/numbered_list.rb +10 -0
  34. data/lib/rosetta/tokens/numbered_list_item.rb +25 -0
  35. data/lib/rosetta/tokens/paragraph.rb +10 -0
  36. data/lib/rosetta/tokens/quote.rb +24 -0
  37. data/lib/rosetta/tokens/shared/inline_tokens.rb +30 -0
  38. data/lib/rosetta/tokens/strikethrough.rb +29 -0
  39. data/lib/rosetta/tokens/text.rb +14 -0
  40. data/lib/rosetta-ruby.rb +15 -0
  41. data/main.rb +21 -0
  42. data/rosetta-ruby.gemspec +16 -0
  43. data/samples/all.md +50 -0
  44. data/samples/bold.md +1 -0
  45. data/samples/code_blocks.md +5 -0
  46. data/samples/headers.md +11 -0
  47. data/samples/link.md +3 -0
  48. data/samples/lists.md +10 -0
  49. data/samples/quotes.md +6 -0
  50. data/samples/text.md +5 -0
  51. metadata +91 -0
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/service_base'
4
+ require './lib/rosetta/tokens/basic_list_item'
5
+ require './lib/rosetta/tokens/code_block_delimiter'
6
+ require './lib/rosetta/tokens/code_block'
7
+ require './lib/rosetta/tokens/header'
8
+ require './lib/rosetta/tokens/break'
9
+ require './lib/rosetta/tokens/line_break'
10
+ require './lib/rosetta/tokens/link'
11
+ require './lib/rosetta/tokens/numbered_list_item'
12
+ require './lib/rosetta/tokens/paragraph'
13
+ require './lib/rosetta/tokens/block_quote'
14
+ require './lib/rosetta/tokens/quote'
15
+ require './lib/rosetta/tokens/text'
16
+ require './lib/rosetta/tokens/basic_token'
17
+ require './lib/rosetta/tokens/basic_list'
18
+ require './lib/rosetta/tokens/numbered_list'
19
+ require './lib/rosetta/tokens/block_token'
20
+ require './lib/rosetta/tokens/new_line'
21
+
22
+ # Parse tokens into AST structure.
23
+ class TreeParser < ServiceBase
24
+ BLOCK_TOKEN_TYPES = {
25
+ QUOTE: BlockQuote,
26
+ BASIC_LIST_ITEM: BasicList,
27
+ NUMBERED_LIST_ITEM: NumberedList
28
+ }.freeze
29
+
30
+ def initialize(tokens)
31
+ @tokens = tokens
32
+ @counter = 0
33
+ end
34
+
35
+ # TODO: Investigate if this can be done without declaring and mutating the array.
36
+ def call
37
+ parsed_tree = []
38
+ parsed_tree << consume_next_block until end_of_file?
39
+
40
+ parsed_tree.compact
41
+ end
42
+
43
+ private
44
+
45
+ def consume_next_block
46
+ block_types = BLOCK_TOKEN_TYPES.keys + [:CODE_BLOCK_DELIMITER]
47
+
48
+ if block_types.include?(current_token.type)
49
+ handle_generic_block_types(current_token.type)
50
+ elsif current_token.type == :LINE_BREAK
51
+ match_break
52
+ elsif current_token.inline?
53
+ match_paragraph
54
+ else
55
+ consume_current_token
56
+ end
57
+ end
58
+
59
+ def current_token
60
+ @tokens[@counter]
61
+ end
62
+
63
+ def next_token
64
+ @tokens[@counter + 1]
65
+ end
66
+
67
+ def match_break
68
+ if next_token.type == :LINE_BREAK
69
+ @counter += 2
70
+ Break.new
71
+ else
72
+ @counter += 1
73
+ # We don't want to insert newlines all the time, the desired
74
+ # insertion is handled in paragraph grouping.
75
+ nil
76
+ end
77
+ end
78
+
79
+ def handle_generic_block_types(token_type)
80
+ return match_code_block if token_type == :CODE_BLOCK_DELIMITER
81
+
82
+ group_siblings_into_block(token_type)
83
+ end
84
+
85
+ def group_siblings_into_block(block_type)
86
+ block_class = BLOCK_TOKEN_TYPES[block_type]
87
+
88
+ child_tokens = []
89
+ child_tokens << consume_current_token while !end_of_file? && current_token.type == block_type
90
+
91
+ block_class.new(child_tokens)
92
+ end
93
+
94
+ def match_paragraph
95
+ child_tokens = []
96
+
97
+ while !end_of_file? && (current_token.inline? || current_token.type == :NEW_LINE)
98
+ child_tokens << consume_current_token
99
+ end
100
+
101
+ Paragraph.new(child_tokens)
102
+ end
103
+
104
+ def match_code_block
105
+ @counter += 1
106
+ child_tokens = []
107
+
108
+ while !end_of_file? && current_token.type != :CODE_BLOCK_DELIMITER
109
+ child_tokens << current_token
110
+ @counter += 1
111
+ end
112
+
113
+ @counter += 1 unless end_of_file?
114
+ CodeBlock.new(child_tokens)
115
+ end
116
+
117
+ def consume_current_token
118
+ frozen_current_token = current_token
119
+ @counter += 1
120
+ frozen_current_token
121
+ end
122
+
123
+ def end_of_file?
124
+ @counter > @tokens.length - 1
125
+ end
126
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/tokens/block_token'
4
+
5
+ # Handles logic for Paragraph tokens
6
+ class BasicList < BlockToken
7
+ def type
8
+ :BASIC_LIST
9
+ end
10
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/tokens/basic_token'
4
+
5
+ # Handles logic for Header tokens
6
+ class BasicListItem < BasicToken
7
+ def self.matches?(text)
8
+ text.start_with?('* ') || text.start_with?('- ')
9
+ end
10
+
11
+ def type
12
+ :BASIC_LIST_ITEM
13
+ end
14
+
15
+ def value
16
+ # If the code reaches here we know the first 2 chars are the list delimiter.
17
+ @source_text[2..-1]
18
+ end
19
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'byebug'
4
+
5
+ # Holds logic and information for basic token management.
6
+ class BasicToken
7
+ TOP_LEVEL_CLASS_NAMES = [
8
+ :Header, :LineBreak, :Quote, :CodeBlockDelimiter, :BasicListItem, :NumberedListItem, :Link
9
+ ].freeze
10
+
11
+ INLINE_CLASS_NAMES = [:Bold, :Italics, :Strikethrough, :InlineCode, :Link].freeze
12
+
13
+ attr_reader :source_text
14
+
15
+ def initialize(source_text)
16
+ @source_text = source_text
17
+ end
18
+
19
+ def to_s
20
+ "<Token type='#{type}' value='#{value}'>"
21
+ end
22
+
23
+ def node_representation
24
+ "<#{type} value='#{value}'>"
25
+ end
26
+
27
+ def type
28
+ raise 'Subclass should handle type.'
29
+ end
30
+
31
+ def value
32
+ raise 'Subclass should handle value.'
33
+ end
34
+
35
+ def accept(visitor)
36
+ method_name = "generate_#{type.to_s.downcase}"
37
+ visitor.send(method_name.to_sym, self)
38
+ end
39
+
40
+ # TODO: Make references to token type constant everywhere
41
+ def inline?
42
+ INLINE_CLASS_NAMES.include?(camel_case_type) || type == :TEXT
43
+ end
44
+
45
+ private
46
+
47
+ def camel_case_type
48
+ # Example: :INLINE_CODE -> :InlineCode
49
+ type.to_s.downcase.split('_').map(&:downcase).map(&:capitalize).join.to_sym
50
+ end
51
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/tokens/block_token'
4
+
5
+ # Handles logic for Paragraph tokens
6
+ class BlockQuote < BlockToken
7
+ def type
8
+ :BLOCK_QUOTE
9
+ end
10
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Holds logic and information for block token management.
4
+ class BlockToken
5
+ attr_reader :children
6
+
7
+ def initialize(tokens)
8
+ @children = tokens
9
+ end
10
+
11
+ def to_s
12
+ "<Token type='#{type}' child_count='#{children.count}' children='#{children}'>"
13
+ end
14
+
15
+ def node_representation
16
+ opening_tag = "<#{type} child_count=#{children.count}>"
17
+ closing_tag = "<#{type} />"
18
+
19
+ contents = children.map { |token| " #{token.node_representation}" }.join("\n")
20
+
21
+ "#{opening_tag}\n#{contents}\n#{closing_tag}"
22
+ end
23
+
24
+ def type
25
+ raise 'Subclass should handle #type.'
26
+ end
27
+
28
+ def value
29
+ raise 'Subclass should handle #value.'
30
+ end
31
+
32
+ def accept(visitor)
33
+ method_name = "generate_#{type.to_s.downcase}"
34
+ visitor.send(method_name.to_sym, self)
35
+ end
36
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/tokens/basic_token'
4
+ require './lib/rosetta/tokens/shared/inline_tokens'
5
+
6
+ # Handles logic for Bold tokens
7
+ class Bold < BasicToken
8
+ extend Shared::InlineTokens
9
+
10
+ DELIMITER_TOKEN = '*'
11
+
12
+ def type
13
+ :BOLD
14
+ end
15
+
16
+ def value
17
+ return DELIMITER_TOKEN * 2 if @source_text == DELIMITER_TOKEN * 2
18
+
19
+ @source_text[self.class.delimiter_length...-self.class.delimiter_length]
20
+ end
21
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/tokens/basic_token'
4
+
5
+ # Handles logic for Break tokens
6
+ class Break < BasicToken
7
+ def self.matches?(text)
8
+ text == ''
9
+ end
10
+
11
+ def initialize; end
12
+
13
+ def type
14
+ :BREAK
15
+ end
16
+
17
+ def value
18
+ nil
19
+ end
20
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/tokens/block_token'
4
+
5
+ # Handles logic for Code Block tokens
6
+ class CodeBlock < BlockToken
7
+ def type
8
+ :CODE_BLOCK
9
+ end
10
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/tokens/basic_token'
4
+
5
+ # Handles logic for Header tokens
6
+ class CodeBlockDelimiter < BasicToken
7
+ def self.matches?(text)
8
+ text == '```'
9
+ end
10
+
11
+ def type
12
+ :CODE_BLOCK_DELIMITER
13
+ end
14
+
15
+ def value
16
+ nil
17
+ end
18
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/tokens/basic_token'
4
+
5
+ # Handles logic for Header tokens
6
+ class Header < BasicToken
7
+ def self.matches?(text)
8
+ return false unless text[0] == '#'
9
+
10
+ text.chars.each do |char|
11
+ next if char == '#'
12
+
13
+ return char == ' '
14
+ end
15
+ end
16
+
17
+ def initialize(source_text)
18
+ super(source_text)
19
+ @depth = determine_depth
20
+ end
21
+
22
+ def type
23
+ "HEADER_#{@depth}".to_sym
24
+ end
25
+
26
+ def value
27
+ @value ||= extract_value_from_text
28
+ end
29
+
30
+ private
31
+
32
+ def extract_value_from_text
33
+ prefix = "#{'#' * @depth} "
34
+ @source_text.delete_prefix(prefix)
35
+ end
36
+
37
+ def determine_depth
38
+ depth = 0
39
+ depth += 1 while @source_text[depth] == '#'
40
+ depth
41
+ end
42
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/tokens/basic_token'
4
+ require './lib/rosetta/tokens/shared/inline_tokens'
5
+
6
+ # Handles logic for Inline Code tokens
7
+ class InlineCode < BasicToken
8
+ extend Shared::InlineTokens
9
+
10
+ DELIMITER_TOKEN = '`'
11
+
12
+ def self.delimiter_length
13
+ DELIMITER_TOKEN.length
14
+ end
15
+
16
+ def type
17
+ :INLINE_CODE
18
+ end
19
+
20
+ def value
21
+ return DELIMITER_TOKEN * 2 if @source_text == DELIMITER_TOKEN * 2
22
+
23
+ @source_text[self.class.delimiter_length...-self.class.delimiter_length]
24
+ end
25
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/tokens/basic_token'
4
+ require './lib/rosetta/tokens/shared/inline_tokens'
5
+
6
+ # Handles logic for Italics tokens
7
+ class Italics < BasicToken
8
+ extend Shared::InlineTokens
9
+
10
+ DELIMITER_TOKEN = '_'
11
+
12
+ def type
13
+ :ITALICS
14
+ end
15
+
16
+ def value
17
+ return DELIMITER_TOKEN * 2 if @source_text == DELIMITER_TOKEN * 2
18
+
19
+ @source_text[self.class.delimiter_length...-self.class.delimiter_length]
20
+ end
21
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/rosetta/tokens/basic_token'
4
+
5
+ # Handles logic for Line Break tokens
6
+ class LineBreak < BasicToken
7
+ def self.matches?(text)
8
+ text == ''
9
+ end
10
+
11
+ def type
12
+ :LINE_BREAK
13
+ end
14
+
15
+ def value
16
+ nil
17
+ end
18
+ end