xrb 0.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/bake/xrb/entities.rb +60 -0
- data/bake/xrb/parsers.rb +69 -0
- data/ext/extconf.rb +21 -0
- data/ext/xrb/escape.c +152 -0
- data/ext/xrb/escape.h +15 -0
- data/ext/xrb/markup.c +1949 -0
- data/ext/xrb/markup.h +6 -0
- data/ext/xrb/markup.rl +226 -0
- data/ext/xrb/query.c +619 -0
- data/ext/xrb/query.h +6 -0
- data/ext/xrb/query.rl +82 -0
- data/ext/xrb/tag.c +204 -0
- data/ext/xrb/tag.h +21 -0
- data/ext/xrb/template.c +1114 -0
- data/ext/xrb/template.h +6 -0
- data/ext/xrb/template.rl +77 -0
- data/ext/xrb/xrb.c +72 -0
- data/ext/xrb/xrb.h +132 -0
- data/lib/xrb/buffer.rb +103 -0
- data/lib/xrb/builder.rb +222 -0
- data/lib/xrb/entities.rb +2137 -0
- data/lib/xrb/entities.xrb +30 -0
- data/lib/xrb/error.rb +81 -0
- data/lib/xrb/fallback/markup.rb +1658 -0
- data/lib/xrb/fallback/markup.rl +228 -0
- data/lib/xrb/fallback/query.rb +548 -0
- data/lib/xrb/fallback/query.rl +88 -0
- data/lib/xrb/fallback/template.rb +829 -0
- data/lib/xrb/fallback/template.rl +80 -0
- data/lib/xrb/markup.rb +56 -0
- data/lib/xrb/native.rb +15 -0
- data/lib/xrb/parse_delegate.rb +19 -0
- data/lib/xrb/parsers.rb +17 -0
- data/lib/xrb/query.rb +80 -0
- data/lib/xrb/reference.rb +108 -0
- data/lib/xrb/strings.rb +47 -0
- data/lib/xrb/tag.rb +115 -0
- data/lib/xrb/template.rb +164 -0
- data/lib/xrb/uri.rb +100 -0
- data/lib/xrb/version.rb +8 -0
- data/lib/xrb.rb +11 -0
- data/license.md +23 -0
- data/readme.md +29 -0
- data.tar.gz.sig +0 -0
- metadata +109 -58
- metadata.gz.sig +0 -0
- data/README +0 -60
- data/app/helpers/ui_helper.rb +0 -80
- data/app/models/xrb/element.rb +0 -9
- data/lib/xrb/engine.rb +0 -4
- data/rails/init.rb +0 -1
- data/xrb.gemspec +0 -12
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ec15e8b31b37bcb608d2e1fb7e5748081e76872a8aa5c31b4ec6fc4d4e74122c
|
4
|
+
data.tar.gz: d57336048d69699d54a16016dd1bcaf3764372828baa41a3b7da59273cf50f18
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7f8f906db2c28206e7892ce08775015d090785f29a6c095959a3de25e56d6a40ace9e7941613fc9158ce9bab588d1bfa6add5d0b72ab1f251174d7ef9de2cfce
|
7
|
+
data.tar.gz: fe3707d210322d99d85f1b75be28e9990a3a22addf1a56a675ba95fcb262fd1669185edeb5dd7bb3caec1995535f74fc9cb8efa6921e9476314080e89e244395
|
checksums.yaml.gz.sig
ADDED
Binary file
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Released under the MIT License.
|
4
|
+
# Copyright, 2017-2024, by Samuel Williams.
|
5
|
+
|
6
|
+
# Fetch the HTML5 entities from w3.org and update the local cache.
|
7
|
+
# @parameter force [Boolean] Whether to force regenerate the local cache.
|
8
|
+
def fetch_entities(force: false)
|
9
|
+
require 'json'
|
10
|
+
require 'async'
|
11
|
+
require 'async/http/internet'
|
12
|
+
|
13
|
+
internet = Async::HTTP::Internet.new
|
14
|
+
entites_json_path = self.entites_json_path
|
15
|
+
|
16
|
+
if force || !File.exist?(entites_json_path)
|
17
|
+
url = "https://www.w3.org/TR/html5/entities.json"
|
18
|
+
|
19
|
+
Sync do
|
20
|
+
File.write(entites_json_path, internet.get(url).read)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
return JSON.parse(File.read(entites_json_path)).delete_if{|string, _| !string.end_with? ';'}
|
25
|
+
end
|
26
|
+
|
27
|
+
# Consume the HTML5 entites and generate parsers to escape them.
|
28
|
+
# @parameter wet [Boolean] Whether to write updated files.
|
29
|
+
def update_entities(wet: false)
|
30
|
+
require 'xrb/template'
|
31
|
+
|
32
|
+
paths = {
|
33
|
+
# 'ext/xrb/entities.rl' => 'ext/xrb/entities.xrb',
|
34
|
+
'lib/xrb/entities.rb' => 'lib/xrb/entities.xrb',
|
35
|
+
}
|
36
|
+
|
37
|
+
entities = self.fetch_entities
|
38
|
+
|
39
|
+
paths.each do |output_path, template_path|
|
40
|
+
template_path = File.expand_path(template_path, context.root)
|
41
|
+
output_path = File.expand_path(output_path, context.root)
|
42
|
+
|
43
|
+
template = XRB::Template.load_file(template_path)
|
44
|
+
|
45
|
+
output = template.to_string(entities)
|
46
|
+
|
47
|
+
if wet
|
48
|
+
File.write(output_path, output)
|
49
|
+
else
|
50
|
+
puts "*** #{output_path} ***"
|
51
|
+
puts output
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def entites_json_path
|
59
|
+
File.expand_path("entities.json", context.root)
|
60
|
+
end
|
data/bake/xrb/parsers.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Released under the MIT License.
|
4
|
+
# Copyright, 2017-2024, by Samuel Williams.
|
5
|
+
|
6
|
+
# Generate the pure Ruby parsers.
|
7
|
+
def generate_fallback
|
8
|
+
Dir.chdir(fallback_directory) do
|
9
|
+
Dir.glob("*.rl").each do |parser_path|
|
10
|
+
system("ragel", "-I", parsers_directory, "-R", parser_path, "-F1")
|
11
|
+
end
|
12
|
+
|
13
|
+
# sh("ruby-beautify", "--tabs", "--overwrite", *Dir.glob("*.rb"))
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Generate the native C parsers.
|
18
|
+
def generate_native
|
19
|
+
Dir.chdir(native_directory) do
|
20
|
+
Dir.glob("*.rl").each do |parser_path|
|
21
|
+
system("ragel", "-I", parsers_directory, "-C", parser_path, "-G2")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Compile the C extension.
|
27
|
+
def compile
|
28
|
+
system("rake", "compile", chdir: extensions_directory)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Generate the parsers and compile them as required.
|
32
|
+
def generate
|
33
|
+
self.generate_native
|
34
|
+
self.generate_fallback
|
35
|
+
self.compile
|
36
|
+
end
|
37
|
+
|
38
|
+
# Generate a visualisation of the parsers.
|
39
|
+
def visualize_parsers
|
40
|
+
Dir.chdir(fallback_directory) do
|
41
|
+
Dir.glob("*.rl").each do |parser_path|
|
42
|
+
dot_path = parser_path + ".dot"
|
43
|
+
system("ragel", "-I", parsers_directory, "-Vp", parser_path, "-o", dot_path)
|
44
|
+
|
45
|
+
pdf_path = parser_path + ".pdf"
|
46
|
+
system("dot", "-Tpdf", "-o", pdf_path, dot_path)
|
47
|
+
|
48
|
+
system("open", pdf_path) rescue nil
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def parsers_directory
|
56
|
+
File.expand_path("parsers", context.root)
|
57
|
+
end
|
58
|
+
|
59
|
+
def fallback_directory
|
60
|
+
File.expand_path("lib/xrb/fallback", context.root)
|
61
|
+
end
|
62
|
+
|
63
|
+
def extensions_directory
|
64
|
+
File.expand_path("ext", context.root)
|
65
|
+
end
|
66
|
+
|
67
|
+
def native_directory
|
68
|
+
File.expand_path("ext/xrb", context.root)
|
69
|
+
end
|
data/ext/extconf.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Released under the MIT License.
|
4
|
+
# Copyright, 2016-2024, by Samuel Williams.
|
5
|
+
|
6
|
+
return if RUBY_DESCRIPTION =~ /jruby/
|
7
|
+
|
8
|
+
require 'mkmf'
|
9
|
+
|
10
|
+
$CFLAGS << " -O3 -Wall -Wno-unknown-pragmas -std=c99"
|
11
|
+
|
12
|
+
have_func("rb_sym2str")
|
13
|
+
have_func("rb_str_cat_cstr")
|
14
|
+
have_func("rb_str_reserve")
|
15
|
+
|
16
|
+
$srcs = Dir.glob("xrb/*.c")
|
17
|
+
$VPATH << "$(srcdir)/xrb"
|
18
|
+
|
19
|
+
create_header
|
20
|
+
|
21
|
+
create_makefile('XRB_Extension')
|
data/ext/xrb/escape.c
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
|
2
|
+
#include "escape.h"
|
3
|
+
#include <assert.h>
|
4
|
+
|
5
|
+
inline static int XRB_Markup_is_markup(VALUE value) {
|
6
|
+
if (RB_IMMEDIATE_P(value))
|
7
|
+
return 0;
|
8
|
+
|
9
|
+
// This is a short-cut:
|
10
|
+
if (rb_class_of(value) == rb_XRB_MarkupString) {
|
11
|
+
return 1;
|
12
|
+
}
|
13
|
+
|
14
|
+
return rb_funcall(value, id_is_a, 1, rb_XRB_Markup) == Qtrue;
|
15
|
+
}
|
16
|
+
|
17
|
+
VALUE XRB_MarkupString_raw(VALUE self, VALUE string) {
|
18
|
+
string = rb_str_dup(string);
|
19
|
+
|
20
|
+
rb_obj_reveal(string, rb_XRB_MarkupString);
|
21
|
+
|
22
|
+
return string;
|
23
|
+
}
|
24
|
+
|
25
|
+
// => [["<", 60, "3c"], [">", 62, "3e"], ["\"", 34, "22"], ["&", 38, "26"]]
|
26
|
+
// static const uint32_t MASK = 0x3e3e3e3e;
|
27
|
+
//
|
28
|
+
// static const uint32_t MASK_LT = 0x3c3c3c3c;
|
29
|
+
// static const uint32_t MASK_GT = 0x3e3e3e3e;
|
30
|
+
// static const uint32_t MASK_QUOT = 0x22222222;
|
31
|
+
// static const uint32_t MASK_AMP = 0x26262626;
|
32
|
+
|
33
|
+
static inline const char * XRB_Markup_index_symbol(const char * begin, const char * end) {
|
34
|
+
const char * p = begin;
|
35
|
+
|
36
|
+
while (p < end) {
|
37
|
+
// if ((end - p) >= 4) {
|
38
|
+
// // Do the next 4 characters contain anything we are interested in?
|
39
|
+
// if ((*(const uint32_t *)p) & MASK_LT) {
|
40
|
+
// p += 4;
|
41
|
+
//
|
42
|
+
// continue;
|
43
|
+
// }
|
44
|
+
// }
|
45
|
+
|
46
|
+
switch (*p) {
|
47
|
+
case '<':
|
48
|
+
case '>':
|
49
|
+
case '"':
|
50
|
+
case '&':
|
51
|
+
return p;
|
52
|
+
}
|
53
|
+
|
54
|
+
p += 1;
|
55
|
+
}
|
56
|
+
|
57
|
+
return end;
|
58
|
+
}
|
59
|
+
|
60
|
+
static inline void XRB_Markup_append_entity(const char * p, VALUE buffer) {
|
61
|
+
// What symbol are we looking at?
|
62
|
+
switch (*p) {
|
63
|
+
case '<':
|
64
|
+
rb_str_cat_cstr(buffer, "<");
|
65
|
+
break;
|
66
|
+
case '>':
|
67
|
+
rb_str_cat_cstr(buffer, ">");
|
68
|
+
break;
|
69
|
+
case '"':
|
70
|
+
rb_str_cat_cstr(buffer, """);
|
71
|
+
break;
|
72
|
+
case '&':
|
73
|
+
rb_str_cat_cstr(buffer, "&");
|
74
|
+
break;
|
75
|
+
}
|
76
|
+
}
|
77
|
+
|
78
|
+
static inline VALUE XRB_Markup_append_buffer(VALUE buffer, const char * s, const char * p, const char * end) {
|
79
|
+
while (1) {
|
80
|
+
// Append the non-symbol part:
|
81
|
+
rb_str_buf_cat(buffer, s, p - s);
|
82
|
+
|
83
|
+
// We escape early if there were no changes to be made:
|
84
|
+
if (p == end) return buffer;
|
85
|
+
|
86
|
+
XRB_Markup_append_entity(p, buffer);
|
87
|
+
|
88
|
+
s = p + 1;
|
89
|
+
p = XRB_Markup_index_symbol(s, end);
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
// Escape and append a string to the output buffer.
|
94
|
+
VALUE XRB_Markup_append_string(VALUE buffer, VALUE string) {
|
95
|
+
const char * begin = RSTRING_PTR(string);
|
96
|
+
const char * end = begin + RSTRING_LEN(string);
|
97
|
+
|
98
|
+
const char * s = begin;
|
99
|
+
|
100
|
+
// There are two outcomes, either p is at end, or p points to a symbol:
|
101
|
+
const char * p = XRB_Markup_index_symbol(s, end);
|
102
|
+
|
103
|
+
return XRB_Markup_append_buffer(buffer, s, p, end);
|
104
|
+
}
|
105
|
+
|
106
|
+
VALUE XRB_Markup_append(VALUE self, VALUE buffer, VALUE value) {
|
107
|
+
if (value == Qnil) return Qnil;
|
108
|
+
|
109
|
+
if (XRB_Markup_is_markup(value)) {
|
110
|
+
rb_str_append(buffer, value);
|
111
|
+
} else {
|
112
|
+
if (rb_type(value) != T_STRING) {
|
113
|
+
value = rb_funcall(value, id_to_s, 0);
|
114
|
+
}
|
115
|
+
|
116
|
+
XRB_Markup_append_string(buffer, value);
|
117
|
+
}
|
118
|
+
|
119
|
+
return buffer;
|
120
|
+
}
|
121
|
+
|
122
|
+
// Convert markup special characters to entities. May return the original string if no changes were made.
|
123
|
+
VALUE XRB_Markup_escape_string(VALUE self, VALUE string) {
|
124
|
+
const char * begin = RSTRING_PTR(string);
|
125
|
+
const char * end = begin + RSTRING_LEN(string);
|
126
|
+
|
127
|
+
const char * s = begin;
|
128
|
+
|
129
|
+
// There are two outcomes, either p is at end, or p points to a symbol:
|
130
|
+
const char * p = XRB_Markup_index_symbol(s, end);
|
131
|
+
|
132
|
+
// We escape early if there were no changes to be made:
|
133
|
+
if (p == end) return string;
|
134
|
+
|
135
|
+
return XRB_Markup_append_buffer(XRB_buffer_for(string), s, p, end);
|
136
|
+
}
|
137
|
+
|
138
|
+
void Init_xrb_escape() {
|
139
|
+
rb_XRB_MarkupString = rb_define_class_under(rb_XRB, "MarkupString", rb_cString);
|
140
|
+
rb_gc_register_mark_object(rb_XRB_MarkupString);
|
141
|
+
|
142
|
+
rb_include_module(rb_XRB_MarkupString, rb_XRB_Markup);
|
143
|
+
|
144
|
+
rb_undef_method(rb_class_of(rb_XRB_Markup), "escape_string");
|
145
|
+
rb_define_singleton_method(rb_XRB_Markup, "escape_string", XRB_Markup_escape_string, 1);
|
146
|
+
|
147
|
+
rb_undef_method(rb_class_of(rb_XRB_Markup), "append");
|
148
|
+
rb_define_singleton_method(rb_XRB_Markup, "append", XRB_Markup_append, 2);
|
149
|
+
|
150
|
+
rb_undef_method(rb_class_of(rb_XRB_Markup), "raw");
|
151
|
+
rb_define_singleton_method(rb_XRB_Markup, "raw", XRB_MarkupString_raw, 1);
|
152
|
+
}
|
data/ext/xrb/escape.h
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
|
2
|
+
#pragma once
|
3
|
+
|
4
|
+
#include "xrb.h"
|
5
|
+
|
6
|
+
void Init_xrb_escape();
|
7
|
+
|
8
|
+
// Given a string, replace it's class with XRB::MarkupString so that it would be output as is.
|
9
|
+
VALUE XRB_MarkupString_raw(VALUE self, VALUE string);
|
10
|
+
|
11
|
+
// Append any value to the output buffer efficiently, escaping entities as needed.
|
12
|
+
VALUE XRB_Markup_append(VALUE self, VALUE buffer, VALUE value);
|
13
|
+
|
14
|
+
// Escape any entities in the given string. If no entities were found, might return the original string.
|
15
|
+
VALUE XRB_Markup_escape_string(VALUE self, VALUE string);
|