commonmarker 0.16.7 → 0.16.8

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of commonmarker might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b76f6ad30dbe9d4c8109f74a6f97c28bfcfa298e
4
- data.tar.gz: 361cc09938bbbeeb878800e41a3a8ef1d1d1b7e4
3
+ metadata.gz: c967d121b146595ca1a31130b381e8f323879cf3
4
+ data.tar.gz: 4fd79c41fd8520743ccbba1039681a2dc41c6762
5
5
  SHA512:
6
- metadata.gz: 3bdad006fe65b5c86983c015f6da516376ea8d897bc14ee01e8e583e6b54a7746db4027bd1d9c18d5a4dd83d12fdbd617bfecd496a01b752222c370737a690f1
7
- data.tar.gz: b778240a0b734480af1b415f9bca900ecb05d0d7bdce0b2d662311fb3d542453efbc3e8dae08342b10599844055b6e7897c498ac30c7b09ed947935b3e8f7c79
6
+ metadata.gz: be9d93dc746d42505c8d10007c379d252d04c72efbfe2236dcac606d6df13d7ec4465b251129cecaaa19e3827ef32a6fea445fcdcc0c8836caa1488d7186b134
7
+ data.tar.gz: a1086e64de21020597e4b66cc941295a7be660fa5aafed3a50be342d93acae175fd4f9b309264db010dd6ec71d1655cc0cc57e129449d3dd9e56c191fb8ada4a
@@ -5,15 +5,6 @@
5
5
  #include "inlines.h"
6
6
  #include "chunk.h"
7
7
 
8
- static unsigned int refhash(const unsigned char *link_ref) {
9
- unsigned int hash = 0;
10
-
11
- while (*link_ref)
12
- hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash;
13
-
14
- return hash;
15
- }
16
-
17
8
  static void reference_free(cmark_reference_map *map, cmark_reference *ref) {
18
9
  cmark_mem *mem = map->mem;
19
10
  if (ref != NULL) {
@@ -53,21 +44,6 @@ static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) {
53
44
  return result;
54
45
  }
55
46
 
56
- static void add_reference(cmark_reference_map *map, cmark_reference *ref) {
57
- cmark_reference *t = ref->next = map->table[ref->hash % REFMAP_SIZE];
58
-
59
- while (t) {
60
- if (t->hash == ref->hash && !strcmp((char *)t->label, (char *)ref->label)) {
61
- reference_free(map, ref);
62
- return;
63
- }
64
-
65
- t = t->next;
66
- }
67
-
68
- map->table[ref->hash % REFMAP_SIZE] = ref;
69
- }
70
-
71
47
  void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
72
48
  cmark_chunk *url, cmark_chunk *title) {
73
49
  cmark_reference *ref;
@@ -77,64 +53,98 @@ void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
77
53
  if (reflabel == NULL)
78
54
  return;
79
55
 
56
+ assert(map->sorted == NULL);
57
+
80
58
  ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref));
81
59
  ref->label = reflabel;
82
- ref->hash = refhash(ref->label);
83
60
  ref->url = cmark_clean_url(map->mem, url);
84
61
  ref->title = cmark_clean_title(map->mem, title);
85
- ref->next = NULL;
62
+ ref->age = map->size;
63
+ ref->next = map->refs;
64
+
65
+ map->refs = ref;
66
+ map->size++;
67
+ }
68
+
69
+ static int
70
+ labelcmp(const unsigned char *a, const unsigned char *b) {
71
+ return strcmp((const char *)a, (const char *)b);
72
+ }
73
+
74
+ static int
75
+ refcmp(const void *p1, const void *p2) {
76
+ cmark_reference *r1 = *(cmark_reference **)p1;
77
+ cmark_reference *r2 = *(cmark_reference **)p2;
78
+ int res = labelcmp(r1->label, r2->label);
79
+ return res ? res : ((int)r1->age - (int)r2->age);
80
+ }
81
+
82
+ static int
83
+ refsearch(const void *label, const void *p2) {
84
+ cmark_reference *ref = *(cmark_reference **)p2;
85
+ return labelcmp((const unsigned char *)label, ref->label);
86
+ }
87
+
88
+ static void sort_references(cmark_reference_map *map) {
89
+ unsigned int i = 0, last = 0, size = map->size;
90
+ cmark_reference *r = map->refs, **sorted = NULL;
91
+
92
+ sorted = (cmark_reference **)map->mem->calloc(size, sizeof(cmark_reference *));
93
+ while (r) {
94
+ sorted[i++] = r;
95
+ r = r->next;
96
+ }
97
+
98
+ qsort(sorted, size, sizeof(cmark_reference *), refcmp);
99
+
100
+ for (i = 1; i < size; i++) {
101
+ if (labelcmp(sorted[i]->label, sorted[last]->label) != 0)
102
+ sorted[++last] = sorted[i];
103
+ }
86
104
 
87
- add_reference(map, ref);
105
+ map->sorted = sorted;
106
+ map->size = last + 1;
88
107
  }
89
108
 
90
109
  // Returns reference if refmap contains a reference with matching
91
110
  // label, otherwise NULL.
92
111
  cmark_reference *cmark_reference_lookup(cmark_reference_map *map,
93
112
  cmark_chunk *label) {
94
- cmark_reference *ref = NULL;
113
+ cmark_reference **ref = NULL;
95
114
  unsigned char *norm;
96
- unsigned int hash;
97
115
 
98
116
  if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH)
99
117
  return NULL;
100
118
 
101
- if (map == NULL)
119
+ if (map == NULL || !map->size)
102
120
  return NULL;
103
121
 
104
122
  norm = normalize_reference(map->mem, label);
105
123
  if (norm == NULL)
106
124
  return NULL;
107
125
 
108
- hash = refhash(norm);
109
- ref = map->table[hash % REFMAP_SIZE];
110
-
111
- while (ref) {
112
- if (ref->hash == hash && !strcmp((char *)ref->label, (char *)norm))
113
- break;
114
- ref = ref->next;
115
- }
126
+ if (!map->sorted)
127
+ sort_references(map);
116
128
 
129
+ ref = (cmark_reference **)bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *), refsearch);
117
130
  map->mem->free(norm);
118
- return ref;
131
+ return ref ? ref[0] : NULL;
119
132
  }
120
133
 
121
134
  void cmark_reference_map_free(cmark_reference_map *map) {
122
- unsigned int i;
135
+ cmark_reference *ref;
123
136
 
124
137
  if (map == NULL)
125
138
  return;
126
139
 
127
- for (i = 0; i < REFMAP_SIZE; ++i) {
128
- cmark_reference *ref = map->table[i];
129
- cmark_reference *next;
130
-
131
- while (ref) {
132
- next = ref->next;
133
- reference_free(map, ref);
134
- ref = next;
135
- }
140
+ ref = map->refs;
141
+ while (ref) {
142
+ cmark_reference *next = ref->next;
143
+ reference_free(map, ref);
144
+ ref = next;
136
145
  }
137
146
 
147
+ map->mem->free(map->sorted);
138
148
  map->mem->free(map);
139
149
  }
140
150
 
@@ -8,21 +8,21 @@
8
8
  extern "C" {
9
9
  #endif
10
10
 
11
- #define REFMAP_SIZE 16
12
-
13
11
  struct cmark_reference {
14
12
  struct cmark_reference *next;
15
13
  unsigned char *label;
16
14
  cmark_chunk url;
17
15
  cmark_chunk title;
18
- unsigned int hash;
16
+ unsigned int age;
19
17
  };
20
18
 
21
19
  typedef struct cmark_reference cmark_reference;
22
20
 
23
21
  struct cmark_reference_map {
24
22
  cmark_mem *mem;
25
- cmark_reference *table[REFMAP_SIZE];
23
+ cmark_reference *refs;
24
+ cmark_reference **sorted;
25
+ unsigned int size;
26
26
  };
27
27
 
28
28
  typedef struct cmark_reference_map cmark_reference_map;
@@ -92,7 +92,7 @@ class CMark:
92
92
  else:
93
93
  libnames = [ ["lib", ".so"] ]
94
94
  if not library_dir:
95
- library_dir = os.path.join("build", "src")
95
+ library_dir = os.path.join("..", "build", "src")
96
96
  for prefix, suffix in libnames:
97
97
  candidate = os.path.join(library_dir, prefix + "cmark-gfm" + suffix)
98
98
  if os.path.isfile(candidate):
@@ -5,17 +5,31 @@ import re
5
5
  import argparse
6
6
  import sys
7
7
  import platform
8
+ import itertools
9
+ import multiprocessing
8
10
  from cmark import CMark
9
11
 
10
- if __name__ == "__main__":
11
- parser = argparse.ArgumentParser(description='Run cmark tests.')
12
- parser.add_argument('--program', dest='program', nargs='?', default=None,
13
- help='program to test')
14
- parser.add_argument('--library-dir', dest='library_dir', nargs='?',
15
- default=None, help='directory containing dynamic library')
16
- args = parser.parse_args(sys.argv[1:])
12
+ def hash_collisions():
13
+ REFMAP_SIZE = 16
14
+ COUNT = 50000
15
+
16
+ def badhash(ref):
17
+ h = 0
18
+ for c in ref:
19
+ a = (h << 6) & 0xFFFFFFFF
20
+ b = (h << 16) & 0xFFFFFFFF
21
+ h = ord(c) + a + b - h
22
+ h = h & 0xFFFFFFFF
23
+
24
+ return (h % REFMAP_SIZE) == 0
25
+
26
+ keys = ("x%d" % i for i in itertools.count())
27
+ collisions = itertools.islice((k for k in keys if badhash(k)), COUNT)
28
+ bad_key = next(collisions)
29
+
30
+ document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions)
17
31
 
18
- cmark = CMark(prog=args.program, library_dir=args.library_dir)
32
+ return document, re.compile("(<p>\[%s\]</p>\n){%d}" % (bad_key, COUNT-1))
19
33
 
20
34
  # list of pairs consisting of input and a regex that must match the output.
21
35
  pathological = {
@@ -58,32 +72,56 @@ pathological = {
58
72
  re.compile("abc\ufffd?de\ufffd?")),
59
73
  "backticks":
60
74
  ("".join(map(lambda x: ("e" + "`" * x), range(1,10000))),
61
- re.compile("^<p>[e`]*</p>\n$"))
75
+ re.compile("^<p>[e`]*</p>\n$")),
76
+ "reference collisions": hash_collisions()
62
77
  }
63
78
 
64
79
  whitespace_re = re.compile('/s+/')
65
80
  passed = 0
66
81
  errored = 0
67
- failed = 0
82
+ TIMEOUT = 5
83
+
84
+ def run_test(inp, regex):
85
+ parser = argparse.ArgumentParser(description='Run cmark tests.')
86
+ parser.add_argument('--program', dest='program', nargs='?', default=None,
87
+ help='program to test')
88
+ parser.add_argument('--library-dir', dest='library_dir', nargs='?',
89
+ default=None, help='directory containing dynamic library')
90
+ args = parser.parse_args(sys.argv[1:])
91
+ cmark = CMark(prog=args.program, library_dir=args.library_dir)
68
92
 
69
- print("Testing pathological cases:")
70
- for description in pathological:
71
- (inp, regex) = pathological[description]
72
93
  [rc, actual, err] = cmark.to_html(inp)
73
94
  if rc != 0:
74
- errored += 1
75
- print(description, '[ERRORED (return code %d)]' %rc)
95
+ print('[ERRORED (return code %d)]' % rc)
76
96
  print(err)
97
+ exit(1)
77
98
  elif regex.search(actual):
78
- print(description, '[PASSED]')
79
- passed += 1
99
+ print('[PASSED]')
80
100
  else:
81
- print(description, '[FAILED]')
101
+ print('[FAILED (mismatch)]')
82
102
  print(repr(actual))
83
- failed += 1
103
+ exit(1)
104
+
105
+ if __name__ == '__main__':
106
+ print("Testing pathological cases:")
107
+ for description in pathological:
108
+ (inp, regex) = pathological[description]
109
+ print(description, "... ", end='')
110
+ sys.stdout.flush()
111
+
112
+ p = multiprocessing.Process(target=run_test, args=(inp, regex))
113
+ p.start()
114
+ p.join(TIMEOUT)
115
+
116
+ if p.is_alive():
117
+ p.terminate()
118
+ p.join()
119
+ print('[TIMED OUT]')
120
+ errored += 1
121
+ elif p.exitcode != 0:
122
+ errored += 1
123
+ else:
124
+ passed += 1
84
125
 
85
- print("%d passed, %d failed, %d errored" % (passed, failed, errored))
86
- if (failed == 0 and errored == 0):
87
- exit(0)
88
- else:
89
- exit(1)
126
+ print("%d passed, %d errored" % (passed, errored))
127
+ exit(errored)
@@ -20,15 +20,17 @@ GFM is a strict superset of CommonMark. All the features which are supported in
20
20
  GitHub user content and that are not specified on the original CommonMark Spec
21
21
  are hence known as **extensions**, and highlighted as such.
22
22
 
23
+ While GFM supports a wide range of inputs, it's worth noting that GitHub.com
24
+ and GitHub Enterprise perform additional post-processing and sanitization after
25
+ GFM is converted to HTML to ensure security and consistency of the website.
26
+
23
27
  ## What is Markdown?
24
28
 
25
29
  Markdown is a plain text format for writing structured documents,
26
- based on conventions for indicating formatting in email
27
- and usenet posts. It was developed by John Gruber (with
28
- help from Aaron Swartz) and released in 2004 in the form of a
29
- [syntax description](http://daringfireball.net/projects/markdown/syntax)
30
- and a Perl script (`Markdown.pl`) for converting Markdown to
31
- HTML. In the next decade, dozens of implementations were
30
+ based on conventions used for indicating formatting in email and
31
+ usenet posts. It was developed in 2004 by John Gruber, who wrote
32
+ the first Markdown-to-HTML converter in Perl, and it soon became
33
+ ubiquitous. In the next decade, dozens of implementations were
32
34
  developed in many languages. Some extended the original
33
35
  Markdown syntax with conventions for footnotes, tables, and
34
36
  other document elements. Some allowed Markdown documents to be
@@ -326,7 +328,7 @@ form feed (`U+000C`), or carriage return (`U+000D`).
326
328
  characters].
327
329
 
328
330
  A [Unicode whitespace character](@) is
329
- any code point in the Unicode `Zs` general category, or a tab (`U+0009`),
331
+ any code point in the Unicode `Zs` class, or a tab (`U+0009`),
330
332
  carriage return (`U+000D`), newline (`U+000A`), or form feed
331
333
  (`U+000C`).
332
334
 
@@ -345,7 +347,7 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`,
345
347
 
346
348
  A [punctuation character](@) is an [ASCII
347
349
  punctuation character] or anything in
348
- the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
350
+ the Unicode classes `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
349
351
 
350
352
  ## Tabs
351
353
 
@@ -416,7 +418,7 @@ as indentation with four spaces would:
416
418
  Normally the `>` that begins a block quote may be followed
417
419
  optionally by a space, which is not considered part of the
418
420
  content. In the following case `>` is followed by a tab,
419
- which is treated as if it were expanded into three spaces.
421
+ which is treated as if it were expanded into spaces.
420
422
  Since one of these spaces is considered part of the
421
423
  delimiter, `foo` is considered to be indented six spaces
422
424
  inside the block quote context, so we get an indented
@@ -495,7 +497,7 @@ We can think of a document as a sequence of
495
497
  quotations, lists, headings, rules, and code blocks. Some blocks (like
496
498
  block quotes and list items) contain other blocks; others (like
497
499
  headings and paragraphs) contain [inline](@) content---text,
498
- links, emphasized text, images, code spans, and so on.
500
+ links, emphasized text, images, code, and so on.
499
501
 
500
502
  ## Precedence
501
503
 
@@ -6047,15 +6049,6 @@ we just have literal backticks:
6047
6049
  <p>`foo</p>
6048
6050
  ````````````````````````````````
6049
6051
 
6050
- The following case also illustrates the need for opening and
6051
- closing backtick strings to be equal in length:
6052
-
6053
- ```````````````````````````````` example
6054
- `foo``bar``
6055
- .
6056
- <p>`foo<code>bar</code></p>
6057
- ````````````````````````````````
6058
-
6059
6052
 
6060
6053
  ## Emphasis and strong emphasis
6061
6054
 
@@ -6110,14 +6103,14 @@ characters that is not preceded or followed by a `_` character.
6110
6103
 
6111
6104
  A [left-flanking delimiter run](@) is
6112
6105
  a [delimiter run] that is (a) not followed by [Unicode whitespace],
6113
- and (b) not followed by a [punctuation character], or
6106
+ and (b) either not followed by a [punctuation character], or
6114
6107
  preceded by [Unicode whitespace] or a [punctuation character].
6115
6108
  For purposes of this definition, the beginning and the end of
6116
6109
  the line count as Unicode whitespace.
6117
6110
 
6118
6111
  A [right-flanking delimiter run](@) is
6119
6112
  a [delimiter run] that is (a) not preceded by [Unicode whitespace],
6120
- and (b) not preceded by a [punctuation character], or
6113
+ and (b) either not preceded by a [punctuation character], or
6121
6114
  followed by [Unicode whitespace] or a [punctuation character].
6122
6115
  For purposes of this definition, the beginning and the end of
6123
6116
  the line count as Unicode whitespace.
@@ -6196,7 +6189,7 @@ The following rules define emphasis and strong emphasis:
6196
6189
  7. A double `**` [can close strong emphasis](@)
6197
6190
  iff it is part of a [right-flanking delimiter run].
6198
6191
 
6199
- 8. A double `__` [can close strong emphasis] iff
6192
+ 8. A double `__` [can close strong emphasis]
6200
6193
  it is part of a [right-flanking delimiter run]
6201
6194
  and either (a) not part of a [left-flanking delimiter run]
6202
6195
  or (b) part of a [left-flanking delimiter run]
@@ -6237,7 +6230,7 @@ the following principles resolve ambiguity:
6237
6230
  `<em><em>...</em></em>`.
6238
6231
 
6239
6232
  14. An interpretation `<em><strong>...</strong></em>` is always
6240
- preferred to `<strong><em>...</em></strong>`.
6233
+ preferred to `<strong><em>..</em></strong>`.
6241
6234
 
6242
6235
  15. When two potential emphasis or strong emphasis spans overlap,
6243
6236
  so that the second begins before the first ends and ends after
@@ -8616,11 +8609,11 @@ The link labels are case-insensitive:
8616
8609
  ````````````````````````````````
8617
8610
 
8618
8611
 
8619
- If you just want a literal `!` followed by bracketed text, you can
8620
- backslash-escape the opening `[`:
8612
+ If you just want bracketed text, you can backslash-escape the
8613
+ opening `!` and `[`:
8621
8614
 
8622
8615
  ```````````````````````````````` example
8623
- !\[foo]
8616
+ \!\[foo]
8624
8617
 
8625
8618
  [foo]: /url "title"
8626
8619
  .
@@ -8835,14 +8828,15 @@ greater number of conditions.
8835
8828
 
8836
8829
  [Autolink]s can also be constructed without requiring the use of `<` and to `>`
8837
8830
  to delimit them, although they will be recognized under a smaller set of
8838
- circumstances. All such recognized autolinks can only come after whitespace,
8839
- or any of the delimiting characters `*`, `_`, `~`, `(`, and `[`.
8831
+ circumstances. All such recognized autolinks can only come at the beginning of
8832
+ a line, after whitespace, or any of the delimiting characters `*`, `_`, `~`,
8833
+ and `(`.
8840
8834
 
8841
- An [extended www autolink](@) will be recognized when a [valid domain] is
8842
- found. A [valid domain](@) consists of the text `www.`, followed by
8843
- alphanumeric characters, underscores (`_`), hyphens (`-`) and periods (`.`).
8844
- There must be at least one period, and no underscores may be present in the
8845
- last two segments of the domain.
8835
+ An [extended www autolink](@) will be recognized when the text `www.` is found
8836
+ followed by a [valid domain]. A [valid domain](@) consists of alphanumeric
8837
+ characters, underscores (`_`), hyphens (`-`) and periods (`.`). There must be
8838
+ at least one period, and no underscores may be present in the last two segments
8839
+ of the domain.
8846
8840
 
8847
8841
  The scheme `http` will be inserted automatically:
8848
8842
 
@@ -9846,4 +9840,3 @@ closers:
9846
9840
 
9847
9841
  After we're done, we remove all delimiters above `stack_bottom` from the
9848
9842
  delimiter stack.
9849
-
@@ -1,3 +1,3 @@
1
1
  module CommonMarker
2
- VERSION = '0.16.7'.freeze
2
+ VERSION = '0.16.8'.freeze
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: commonmarker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.7
4
+ version: 0.16.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-07-12 00:00:00.000000000 Z
12
+ date: 2017-07-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ruby-enum