yarp 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +76 -0
  3. data/CONTRIBUTING.md +51 -0
  4. data/LICENSE.md +7 -0
  5. data/Makefile.in +79 -0
  6. data/README.md +86 -0
  7. data/config.h.in +25 -0
  8. data/config.yml +2147 -0
  9. data/configure +4487 -0
  10. data/docs/build_system.md +85 -0
  11. data/docs/building.md +26 -0
  12. data/docs/configuration.md +56 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +116 -0
  15. data/docs/extension.md +20 -0
  16. data/docs/fuzzing.md +93 -0
  17. data/docs/heredocs.md +36 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/ripper.md +36 -0
  20. data/docs/serialization.md +130 -0
  21. data/docs/testing.md +55 -0
  22. data/ext/yarp/api_node.c +3680 -0
  23. data/ext/yarp/api_pack.c +256 -0
  24. data/ext/yarp/extconf.rb +131 -0
  25. data/ext/yarp/extension.c +547 -0
  26. data/ext/yarp/extension.h +18 -0
  27. data/include/yarp/ast.h +1412 -0
  28. data/include/yarp/defines.h +54 -0
  29. data/include/yarp/diagnostic.h +24 -0
  30. data/include/yarp/enc/yp_encoding.h +94 -0
  31. data/include/yarp/node.h +36 -0
  32. data/include/yarp/pack.h +141 -0
  33. data/include/yarp/parser.h +389 -0
  34. data/include/yarp/regexp.h +19 -0
  35. data/include/yarp/unescape.h +42 -0
  36. data/include/yarp/util/yp_buffer.h +39 -0
  37. data/include/yarp/util/yp_char.h +75 -0
  38. data/include/yarp/util/yp_constant_pool.h +64 -0
  39. data/include/yarp/util/yp_list.h +67 -0
  40. data/include/yarp/util/yp_memchr.h +14 -0
  41. data/include/yarp/util/yp_newline_list.h +54 -0
  42. data/include/yarp/util/yp_state_stack.h +24 -0
  43. data/include/yarp/util/yp_string.h +57 -0
  44. data/include/yarp/util/yp_string_list.h +28 -0
  45. data/include/yarp/util/yp_strpbrk.h +29 -0
  46. data/include/yarp/version.h +5 -0
  47. data/include/yarp.h +69 -0
  48. data/lib/yarp/lex_compat.rb +759 -0
  49. data/lib/yarp/node.rb +7428 -0
  50. data/lib/yarp/pack.rb +185 -0
  51. data/lib/yarp/ripper_compat.rb +174 -0
  52. data/lib/yarp/serialize.rb +389 -0
  53. data/lib/yarp.rb +330 -0
  54. data/src/diagnostic.c +25 -0
  55. data/src/enc/yp_big5.c +79 -0
  56. data/src/enc/yp_euc_jp.c +85 -0
  57. data/src/enc/yp_gbk.c +88 -0
  58. data/src/enc/yp_shift_jis.c +83 -0
  59. data/src/enc/yp_tables.c +509 -0
  60. data/src/enc/yp_unicode.c +2320 -0
  61. data/src/enc/yp_windows_31j.c +83 -0
  62. data/src/node.c +2011 -0
  63. data/src/pack.c +493 -0
  64. data/src/prettyprint.c +1782 -0
  65. data/src/regexp.c +580 -0
  66. data/src/serialize.c +1576 -0
  67. data/src/token_type.c +347 -0
  68. data/src/unescape.c +576 -0
  69. data/src/util/yp_buffer.c +78 -0
  70. data/src/util/yp_char.c +229 -0
  71. data/src/util/yp_constant_pool.c +147 -0
  72. data/src/util/yp_list.c +50 -0
  73. data/src/util/yp_memchr.c +31 -0
  74. data/src/util/yp_newline_list.c +119 -0
  75. data/src/util/yp_state_stack.c +25 -0
  76. data/src/util/yp_string.c +207 -0
  77. data/src/util/yp_string_list.c +32 -0
  78. data/src/util/yp_strncasecmp.c +20 -0
  79. data/src/util/yp_strpbrk.c +66 -0
  80. data/src/yarp.c +13211 -0
  81. data/yarp.gemspec +100 -0
  82. metadata +125 -0
data/lib/yarp.rb ADDED
@@ -0,0 +1,330 @@
1
+ # frozen_string_literal: true
2
+
3
+ module YARP
4
+ # This represents a source of Ruby code that has been parsed. It is used in
5
+ # conjunction with locations to allow them to resolve line numbers and source
6
+ # ranges.
7
+ class Source
8
+ attr_reader :source, :offsets
9
+
10
+ def initialize(source, offsets)
11
+ @source = source
12
+ @offsets = offsets
13
+ end
14
+
15
+ def slice(offset, length)
16
+ source.byteslice(offset, length)
17
+ end
18
+
19
+ def line(value)
20
+ offsets.bsearch_index { |offset| offset > value } || offsets.length
21
+ end
22
+
23
+ def column(value)
24
+ value - offsets[line(value) - 1]
25
+ end
26
+ end
27
+
28
+ # This represents a location in the source.
29
+ class Location
30
+ # A Source object that is used to determine more information from the given
31
+ # offset and length.
32
+ private attr_reader :source
33
+
34
+ # The byte offset from the beginning of the source where this location
35
+ # starts.
36
+ attr_reader :start_offset
37
+
38
+ # The length of this location in bytes.
39
+ attr_reader :length
40
+
41
+ def initialize(source, start_offset, length)
42
+ @source = source
43
+ @start_offset = start_offset
44
+ @length = length
45
+ end
46
+
47
+ def inspect
48
+ "#<YARP::Location @start_offset=#{@start_offset} @length=#{@length}>"
49
+ end
50
+
51
+ # The source code that this location represents.
52
+ def slice
53
+ source.slice(start_offset, length)
54
+ end
55
+
56
+ # The byte offset from the beginning of the source where this location ends.
57
+ def end_offset
58
+ start_offset + length
59
+ end
60
+
61
+ # The line number where this location starts.
62
+ def start_line
63
+ source.line(start_offset)
64
+ end
65
+
66
+ # The line number where this location ends.
67
+ def end_line
68
+ source.line(end_offset - 1)
69
+ end
70
+
71
+ # The column number in bytes where this location starts from the start of
72
+ # the line.
73
+ def start_column
74
+ source.column(start_offset)
75
+ end
76
+
77
+ # The column number in bytes where this location ends from the start of the
78
+ # line.
79
+ def end_column
80
+ source.column(end_offset - 1)
81
+ end
82
+
83
+ def deconstruct_keys(keys)
84
+ { start_offset: start_offset, end_offset: end_offset }
85
+ end
86
+
87
+ def pretty_print(q)
88
+ q.text("(#{start_offset}...#{end_offset})")
89
+ end
90
+
91
+ def ==(other)
92
+ other.is_a?(Location) &&
93
+ other.start_offset == start_offset &&
94
+ other.end_offset == end_offset
95
+ end
96
+
97
+ def self.null
98
+ new(0, 0)
99
+ end
100
+ end
101
+
102
+ # This represents a comment that was encountered during parsing.
103
+ class Comment
104
+ attr_reader :type, :location
105
+
106
+ def initialize(type, location)
107
+ @type = type
108
+ @location = location
109
+ end
110
+
111
+ def deconstruct_keys(keys)
112
+ { type: type, location: location }
113
+ end
114
+ end
115
+
116
+ # This represents an error that was encountered during parsing.
117
+ class ParseError
118
+ attr_reader :message, :location
119
+
120
+ def initialize(message, location)
121
+ @message = message
122
+ @location = location
123
+ end
124
+
125
+ def deconstruct_keys(keys)
126
+ { message: message, location: location }
127
+ end
128
+ end
129
+
130
+ # This represents a warning that was encountered during parsing.
131
+ class ParseWarning
132
+ attr_reader :message, :location
133
+
134
+ def initialize(message, location)
135
+ @message = message
136
+ @location = location
137
+ end
138
+
139
+ def deconstruct_keys(keys)
140
+ { message: message, location: location }
141
+ end
142
+ end
143
+
144
+ # A class that knows how to walk down the tree. None of the individual visit
145
+ # methods are implemented on this visitor, so it forces the consumer to
146
+ # implement each one that they need. For a default implementation that
147
+ # continues walking the tree, see the Visitor class.
148
+ class BasicVisitor
149
+ def visit(node)
150
+ node&.accept(self)
151
+ end
152
+
153
+ def visit_all(nodes)
154
+ nodes.map { |node| visit(node) }
155
+ end
156
+
157
+ def visit_child_nodes(node)
158
+ visit_all(node.child_nodes)
159
+ end
160
+ end
161
+
162
+ class Visitor < BasicVisitor
163
+ end
164
+
165
+ # This represents the result of a call to ::parse or ::parse_file. It contains
166
+ # the AST, any comments that were encounters, and any errors that were
167
+ # encountered.
168
+ class ParseResult
169
+ attr_reader :value, :comments, :errors, :warnings, :source
170
+
171
+ def initialize(value, comments, errors, warnings, source)
172
+ @value = value
173
+ @comments = comments
174
+ @errors = errors
175
+ @warnings = warnings
176
+ @source = source
177
+ end
178
+
179
+ def deconstruct_keys(keys)
180
+ { value: value, comments: comments, errors: errors, warnings: warnings }
181
+ end
182
+
183
+ def success?
184
+ errors.empty?
185
+ end
186
+
187
+ def failure?
188
+ !success?
189
+ end
190
+
191
+ # Keep in sync with Java MarkNewlinesVisitor
192
+ class MarkNewlinesVisitor < YARP::Visitor
193
+ def initialize(newline_marked)
194
+ @newline_marked = newline_marked
195
+ end
196
+
197
+ def visit_block_node(node)
198
+ old_newline_marked = @newline_marked
199
+ @newline_marked = Array.new(old_newline_marked.size, false)
200
+ begin
201
+ super(node)
202
+ ensure
203
+ @newline_marked = old_newline_marked
204
+ end
205
+ end
206
+ alias_method :visit_lambda_node, :visit_block_node
207
+
208
+ def visit_if_node(node)
209
+ node.set_newline_flag(@newline_marked)
210
+ super(node)
211
+ end
212
+ alias_method :visit_unless_node, :visit_if_node
213
+
214
+ def visit_statements_node(node)
215
+ node.body.each do |child|
216
+ child.set_newline_flag(@newline_marked)
217
+ end
218
+ super(node)
219
+ end
220
+ end
221
+ private_constant :MarkNewlinesVisitor
222
+
223
+ def mark_newlines
224
+ newline_marked = Array.new(1 + @source.offsets.size, false)
225
+ visitor = MarkNewlinesVisitor.new(newline_marked)
226
+ value.accept(visitor)
227
+ value
228
+ end
229
+ end
230
+
231
+ # This represents a token from the Ruby source.
232
+ class Token
233
+ attr_reader :type, :value, :location
234
+
235
+ def initialize(type, value, location)
236
+ @type = type
237
+ @value = value
238
+ @location = location
239
+ end
240
+
241
+ def deconstruct_keys(keys)
242
+ { type: type, value: value, location: location }
243
+ end
244
+
245
+ def pretty_print(q)
246
+ q.group do
247
+ q.text(type.to_s)
248
+ self.location.pretty_print(q)
249
+ q.text("(")
250
+ q.nest(2) do
251
+ q.breakable("")
252
+ q.pp(value)
253
+ end
254
+ q.breakable("")
255
+ q.text(")")
256
+ end
257
+ end
258
+
259
+ def ==(other)
260
+ other.is_a?(Token) &&
261
+ other.type == type &&
262
+ other.value == value
263
+ end
264
+ end
265
+
266
+ # This represents a node in the tree.
267
+ class Node
268
+ attr_reader :location
269
+
270
+ def newline?
271
+ @newline ? true : false
272
+ end
273
+
274
+ def set_newline_flag(newline_marked)
275
+ line = location.start_line
276
+ unless newline_marked[line]
277
+ newline_marked[line] = true
278
+ @newline = true
279
+ end
280
+ end
281
+
282
+ def pretty_print(q)
283
+ q.group do
284
+ q.text(self.class.name.split("::").last)
285
+ location.pretty_print(q)
286
+ q.text("[Li:#{location.start_line}]") if newline?
287
+ q.text("(")
288
+ q.nest(2) do
289
+ deconstructed = deconstruct_keys([])
290
+ deconstructed.delete(:location)
291
+
292
+ q.breakable("")
293
+ q.seplist(deconstructed, lambda { q.comma_breakable }, :each_value) { |value| q.pp(value) }
294
+ end
295
+ q.breakable("")
296
+ q.text(")")
297
+ end
298
+ end
299
+ end
300
+
301
+ # Load the serialized AST using the source as a reference into a tree.
302
+ def self.load(source, serialized)
303
+ Serialize.load(source, serialized)
304
+ end
305
+
306
+ # This module is used for testing and debugging and is not meant to be used by
307
+ # consumers of this library.
308
+ module Debug
309
+ def self.newlines(source)
310
+ YARP.parse(source).source.offsets
311
+ end
312
+
313
+ def self.parse_serialize_file(filepath)
314
+ parse_serialize_file_metadata(filepath, [filepath.bytesize, filepath.b, 0].pack("LA*L"))
315
+ end
316
+ end
317
+
318
+ # Marking this as private so that consumers don't see it. It makes it a little
319
+ # annoying for testing since you have to const_get it to access the methods,
320
+ # but at least this way it's clear it's not meant for consumers.
321
+ private_constant :Debug
322
+ end
323
+
324
+ require_relative "yarp/lex_compat"
325
+ require_relative "yarp/node"
326
+ require_relative "yarp/ripper_compat"
327
+ require_relative "yarp/serialize"
328
+ require_relative "yarp/pack"
329
+
330
+ require "yarp/yarp"
data/src/diagnostic.c ADDED
@@ -0,0 +1,25 @@
1
+ #include "yarp/diagnostic.h"
2
+
3
+ // Append an error to the given list of diagnostic.
4
+ bool
5
+ yp_diagnostic_list_append(yp_list_t *list, const char *start, const char *end, const char *message) {
6
+ yp_diagnostic_t *diagnostic = (yp_diagnostic_t *) malloc(sizeof(yp_diagnostic_t));
7
+ if (diagnostic == NULL) return false;
8
+
9
+ *diagnostic = (yp_diagnostic_t) { .start = start, .end = end, .message = message };
10
+ yp_list_append(list, (yp_list_node_t *) diagnostic);
11
+ return true;
12
+ }
13
+
14
+ // Deallocate the internal state of the given diagnostic list.
15
+ void
16
+ yp_diagnostic_list_free(yp_list_t *list) {
17
+ yp_list_node_t *node, *next;
18
+
19
+ for (node = list->head; node != NULL; node = next) {
20
+ next = node->next;
21
+
22
+ yp_diagnostic_t *diagnostic = (yp_diagnostic_t *) node;
23
+ free(diagnostic);
24
+ }
25
+ }
data/src/enc/yp_big5.c ADDED
@@ -0,0 +1,79 @@
1
+ #include "yarp/enc/yp_encoding.h"
2
+
3
+ typedef uint16_t yp_big5_codepoint_t;
4
+
5
+ static yp_big5_codepoint_t
6
+ yp_big5_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
+ const unsigned char *uc = (const unsigned char *) c;
8
+
9
+ // These are the single byte characters.
10
+ if (*uc < 0x80) {
11
+ *width = 1;
12
+ return *uc;
13
+ }
14
+
15
+ // These are the double byte characters.
16
+ if ((n > 1) && (uc[0] >= 0xA1 && uc[0] <= 0xFE) && (uc[1] >= 0x40 && uc[1] <= 0xFE)) {
17
+ *width = 2;
18
+ return (yp_big5_codepoint_t) (uc[0] << 8 | uc[1]);
19
+ }
20
+
21
+ *width = 0;
22
+ return 0;
23
+ }
24
+
25
+ static size_t
26
+ yp_encoding_big5_char_width(const char *c, ptrdiff_t n) {
27
+ size_t width;
28
+ yp_big5_codepoint(c, n, &width);
29
+
30
+ return width;
31
+ }
32
+
33
+ static size_t
34
+ yp_encoding_big5_alpha_char(const char *c, ptrdiff_t n) {
35
+ size_t width;
36
+ yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
37
+
38
+ if (width == 1) {
39
+ const char value = (const char) codepoint;
40
+ return yp_encoding_ascii_alpha_char(&value, n);
41
+ } else {
42
+ return 0;
43
+ }
44
+ }
45
+
46
+ static size_t
47
+ yp_encoding_big5_alnum_char(const char *c, ptrdiff_t n) {
48
+ size_t width;
49
+ yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
50
+
51
+ if (width == 1) {
52
+ const char value = (const char) codepoint;
53
+ return yp_encoding_ascii_alnum_char(&value, n);
54
+ } else {
55
+ return 0;
56
+ }
57
+ }
58
+
59
+ static bool
60
+ yp_encoding_big5_isupper_char(const char *c, ptrdiff_t n) {
61
+ size_t width;
62
+ yp_big5_codepoint_t codepoint = yp_big5_codepoint(c, n, &width);
63
+
64
+ if (width == 1) {
65
+ const char value = (const char) codepoint;
66
+ return yp_encoding_ascii_isupper_char(&value, n);
67
+ } else {
68
+ return false;
69
+ }
70
+ }
71
+
72
+ yp_encoding_t yp_encoding_big5 = {
73
+ .name = "big5",
74
+ .char_width = yp_encoding_big5_char_width,
75
+ .alnum_char = yp_encoding_big5_alnum_char,
76
+ .alpha_char = yp_encoding_big5_alpha_char,
77
+ .isupper_char = yp_encoding_big5_isupper_char,
78
+ .multibyte = true
79
+ };
@@ -0,0 +1,85 @@
1
+ #include "yarp/enc/yp_encoding.h"
2
+
3
+ typedef uint16_t yp_euc_jp_codepoint_t;
4
+
5
+ static yp_euc_jp_codepoint_t
6
+ yp_euc_jp_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
+ const unsigned char *uc = (const unsigned char *) c;
8
+
9
+ // These are the single byte characters.
10
+ if (*uc < 0x80) {
11
+ *width = 1;
12
+ return *uc;
13
+ }
14
+
15
+ // These are the double byte characters.
16
+ if (
17
+ (n > 1) &&
18
+ (
19
+ ((uc[0] == 0x8E) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) ||
20
+ ((uc[0] >= 0xA1 && uc[0] <= 0xFE) && (uc[1] >= 0xA1 && uc[1] <= 0xFE))
21
+ )
22
+ ) {
23
+ *width = 2;
24
+ return (yp_euc_jp_codepoint_t) (uc[0] << 8 | uc[1]);
25
+ }
26
+
27
+ *width = 0;
28
+ return 0;
29
+ }
30
+
31
+ static size_t
32
+ yp_encoding_euc_jp_char_width(const char *c, ptrdiff_t n) {
33
+ size_t width;
34
+ yp_euc_jp_codepoint(c, n, &width);
35
+
36
+ return width;
37
+ }
38
+
39
+ static size_t
40
+ yp_encoding_euc_jp_alpha_char(const char *c, ptrdiff_t n) {
41
+ size_t width;
42
+ yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
43
+
44
+ if (width == 1) {
45
+ const char value = (const char) codepoint;
46
+ return yp_encoding_ascii_alpha_char(&value, n);
47
+ } else {
48
+ return 0;
49
+ }
50
+ }
51
+
52
+ static size_t
53
+ yp_encoding_euc_jp_alnum_char(const char *c, ptrdiff_t n) {
54
+ size_t width;
55
+ yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
56
+
57
+ if (width == 1) {
58
+ const char value = (const char) codepoint;
59
+ return yp_encoding_ascii_alnum_char(&value, n);
60
+ } else {
61
+ return 0;
62
+ }
63
+ }
64
+
65
+ static bool
66
+ yp_encoding_euc_jp_isupper_char(const char *c, ptrdiff_t n) {
67
+ size_t width;
68
+ yp_euc_jp_codepoint_t codepoint = yp_euc_jp_codepoint(c, n, &width);
69
+
70
+ if (width == 1) {
71
+ const char value = (const char) codepoint;
72
+ return yp_encoding_ascii_isupper_char(&value, n);
73
+ } else {
74
+ return 0;
75
+ }
76
+ }
77
+
78
+ yp_encoding_t yp_encoding_euc_jp = {
79
+ .name = "euc-jp",
80
+ .char_width = yp_encoding_euc_jp_char_width,
81
+ .alnum_char = yp_encoding_euc_jp_alnum_char,
82
+ .alpha_char = yp_encoding_euc_jp_alpha_char,
83
+ .isupper_char = yp_encoding_euc_jp_isupper_char,
84
+ .multibyte = true
85
+ };
data/src/enc/yp_gbk.c ADDED
@@ -0,0 +1,88 @@
1
+ #include "yarp/enc/yp_encoding.h"
2
+
3
+ typedef uint16_t yp_gbk_codepoint_t;
4
+
5
+ static yp_gbk_codepoint_t
6
+ yp_gbk_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
+ const unsigned char *uc = (const unsigned char *) c;
8
+
9
+ // These are the single byte characters.
10
+ if (*uc < 0x80) {
11
+ *width = 1;
12
+ return *uc;
13
+ }
14
+
15
+ // These are the double byte characters.
16
+ if (
17
+ (n > 1) &&
18
+ (
19
+ ((uc[0] >= 0xA1 && uc[0] <= 0xA9) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) || // GBK/1
20
+ ((uc[0] >= 0xB0 && uc[0] <= 0xF7) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) || // GBK/2
21
+ ((uc[0] >= 0x81 && uc[0] <= 0xA0) && (uc[1] >= 0x40 && uc[1] <= 0xFE) && (uc[1] != 0x7F)) || // GBK/3
22
+ ((uc[0] >= 0xAA && uc[0] <= 0xFE) && (uc[1] >= 0x40 && uc[1] <= 0xA0) && (uc[1] != 0x7F)) || // GBK/4
23
+ ((uc[0] >= 0xA8 && uc[0] <= 0xA9) && (uc[1] >= 0x40 && uc[1] <= 0xA0) && (uc[1] != 0x7F)) // GBK/5
24
+ )
25
+ ) {
26
+ *width = 2;
27
+ return (yp_gbk_codepoint_t) (uc[0] << 8 | uc[1]);
28
+ }
29
+
30
+ *width = 0;
31
+ return 0;
32
+ }
33
+
34
+ static size_t
35
+ yp_encoding_gbk_char_width(const char *c, ptrdiff_t n) {
36
+ size_t width;
37
+ yp_gbk_codepoint(c, n, &width);
38
+
39
+ return width;
40
+ }
41
+
42
+ static size_t
43
+ yp_encoding_gbk_alpha_char(const char *c, ptrdiff_t n) {
44
+ size_t width;
45
+ yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
46
+
47
+ if (width == 1) {
48
+ const char value = (const char) codepoint;
49
+ return yp_encoding_ascii_alpha_char(&value, n);
50
+ } else {
51
+ return 0;
52
+ }
53
+ }
54
+
55
+ static size_t
56
+ yp_encoding_gbk_alnum_char(const char *c, ptrdiff_t n) {
57
+ size_t width;
58
+ yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
59
+
60
+ if (width == 1) {
61
+ const char value = (const char) codepoint;
62
+ return yp_encoding_ascii_alnum_char(&value, n);
63
+ } else {
64
+ return 0;
65
+ }
66
+ }
67
+
68
+ static bool
69
+ yp_encoding_gbk_isupper_char(const char *c, ptrdiff_t n) {
70
+ size_t width;
71
+ yp_gbk_codepoint_t codepoint = yp_gbk_codepoint(c, n, &width);
72
+
73
+ if (width == 1) {
74
+ const char value = (const char) codepoint;
75
+ return yp_encoding_ascii_isupper_char(&value, n);
76
+ } else {
77
+ return false;
78
+ }
79
+ }
80
+
81
+ yp_encoding_t yp_encoding_gbk = {
82
+ .name = "gbk",
83
+ .char_width = yp_encoding_gbk_char_width,
84
+ .alnum_char = yp_encoding_gbk_alnum_char,
85
+ .alpha_char = yp_encoding_gbk_alpha_char,
86
+ .isupper_char = yp_encoding_gbk_isupper_char,
87
+ .multibyte = true
88
+ };
@@ -0,0 +1,83 @@
1
+ #include "yarp/enc/yp_encoding.h"
2
+
3
+ typedef uint16_t yp_shift_jis_codepoint_t;
4
+
5
+ static yp_shift_jis_codepoint_t
6
+ yp_shift_jis_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
+ const unsigned char *uc = (const unsigned char *) c;
8
+
9
+ // These are the single byte characters.
10
+ if (*uc < 0x80 || (*uc >= 0xA1 && *uc <= 0xDF)) {
11
+ *width = 1;
12
+ return *uc;
13
+ }
14
+
15
+ // These are the double byte characters.
16
+ if (
17
+ (n > 1) &&
18
+ ((uc[0] >= 0x81 && uc[0] <= 0x9F) || (uc[0] >= 0xE0 && uc[0] <= 0xFC)) &&
19
+ (uc[1] >= 0x40 && uc[1] <= 0xFC)
20
+ ) {
21
+ *width = 2;
22
+ return (yp_shift_jis_codepoint_t) (uc[0] << 8 | uc[1]);
23
+ }
24
+
25
+ *width = 0;
26
+ return 0;
27
+ }
28
+
29
+ static size_t
30
+ yp_encoding_shift_jis_char_width(const char *c, ptrdiff_t n) {
31
+ size_t width;
32
+ yp_shift_jis_codepoint(c, n, &width);
33
+
34
+ return width;
35
+ }
36
+
37
+ static size_t
38
+ yp_encoding_shift_jis_alpha_char(const char *c, ptrdiff_t n) {
39
+ size_t width;
40
+ yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
41
+
42
+ if (width == 1) {
43
+ const char value = (const char) codepoint;
44
+ return yp_encoding_ascii_alpha_char(&value, n);
45
+ } else {
46
+ return 0;
47
+ }
48
+ }
49
+
50
+ static size_t
51
+ yp_encoding_shift_jis_alnum_char(const char *c, ptrdiff_t n) {
52
+ size_t width;
53
+ yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
54
+
55
+ if (width == 1) {
56
+ const char value = (const char) codepoint;
57
+ return yp_encoding_ascii_alnum_char(&value, n);
58
+ } else {
59
+ return 0;
60
+ }
61
+ }
62
+
63
+ static bool
64
+ yp_encoding_shift_jis_isupper_char(const char *c, ptrdiff_t n) {
65
+ size_t width;
66
+ yp_shift_jis_codepoint_t codepoint = yp_shift_jis_codepoint(c, n, &width);
67
+
68
+ if (width == 1) {
69
+ const char value = (const char) codepoint;
70
+ return yp_encoding_ascii_isupper_char(&value, n);
71
+ } else {
72
+ return 0;
73
+ }
74
+ }
75
+
76
+ yp_encoding_t yp_encoding_shift_jis = {
77
+ .name = "shift_jis",
78
+ .char_width = yp_encoding_shift_jis_char_width,
79
+ .alnum_char = yp_encoding_shift_jis_alnum_char,
80
+ .alpha_char = yp_encoding_shift_jis_alpha_char,
81
+ .isupper_char = yp_encoding_shift_jis_isupper_char,
82
+ .multibyte = true
83
+ };