RubyGems - commonmarker - Versions diffs - 0.16.7 → 0.16.8 - Mend

commonmarker 0.16.7 → 0.16.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of commonmarker might be problematic. Click here for more details.

Files changed (8) hide show

checksums.yaml +4 -4
data/ext/commonmarker/cmark/src/references.c +59 -49
data/ext/commonmarker/cmark/src/references.h +4 -4
data/ext/commonmarker/cmark/test/cmark.py +1 -1
data/ext/commonmarker/cmark/test/pathological_tests.py +62 -24
data/ext/commonmarker/cmark/test/spec.txt +27 -34
data/lib/commonmarker/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: b76f6ad30dbe9d4c8109f74a6f97c28bfcfa298e
-  data.tar.gz: 361cc09938bbbeeb878800e41a3a8ef1d1d1b7e4
+  metadata.gz: c967d121b146595ca1a31130b381e8f323879cf3
+  data.tar.gz: 4fd79c41fd8520743ccbba1039681a2dc41c6762
 SHA512:
-  metadata.gz: 3bdad006fe65b5c86983c015f6da516376ea8d897bc14ee01e8e583e6b54a7746db4027bd1d9c18d5a4dd83d12fdbd617bfecd496a01b752222c370737a690f1
-  data.tar.gz: b778240a0b734480af1b415f9bca900ecb05d0d7bdce0b2d662311fb3d542453efbc3e8dae08342b10599844055b6e7897c498ac30c7b09ed947935b3e8f7c79
+  metadata.gz: be9d93dc746d42505c8d10007c379d252d04c72efbfe2236dcac606d6df13d7ec4465b251129cecaaa19e3827ef32a6fea445fcdcc0c8836caa1488d7186b134
+  data.tar.gz: a1086e64de21020597e4b66cc941295a7be660fa5aafed3a50be342d93acae175fd4f9b309264db010dd6ec71d1655cc0cc57e129449d3dd9e56c191fb8ada4a

data/ext/commonmarker/cmark/src/references.c CHANGED Viewed

@@ -5,15 +5,6 @@
 #include "inlines.h"
 #include "chunk.h"
-static unsigned int refhash(const unsigned char *link_ref) {
-  unsigned int hash = 0;
-  while (*link_ref)
-    hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash;
-  return hash;
-}
 static void reference_free(cmark_reference_map *map, cmark_reference *ref) {
   cmark_mem *mem = map->mem;
   if (ref != NULL) {
@@ -53,21 +44,6 @@ static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) {
   return result;
 }
-static void add_reference(cmark_reference_map *map, cmark_reference *ref) {
-  cmark_reference *t = ref->next = map->table[ref->hash % REFMAP_SIZE];
-  while (t) {
-    if (t->hash == ref->hash && !strcmp((char *)t->label, (char *)ref->label)) {
-      reference_free(map, ref);
-      return;
-    }
-    t = t->next;
-  }
-  map->table[ref->hash % REFMAP_SIZE] = ref;
-}
 void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
                             cmark_chunk *url, cmark_chunk *title) {
   cmark_reference *ref;
@@ -77,64 +53,98 @@ void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
   if (reflabel == NULL)
     return;
+  assert(map->sorted == NULL);
   ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref));
   ref->label = reflabel;
-  ref->hash = refhash(ref->label);
   ref->url = cmark_clean_url(map->mem, url);
   ref->title = cmark_clean_title(map->mem, title);
-  ref->next = NULL;
+  ref->age = map->size;
+  ref->next = map->refs;
+  map->refs = ref;
+  map->size++;
+}
+static int
+labelcmp(const unsigned char *a, const unsigned char *b) {
+  return strcmp((const char *)a, (const char *)b);
+}
+static int
+refcmp(const void *p1, const void *p2) {
+  cmark_reference *r1 = *(cmark_reference **)p1;
+  cmark_reference *r2 = *(cmark_reference **)p2;
+  int res = labelcmp(r1->label, r2->label);
+  return res ? res : ((int)r1->age - (int)r2->age);
+}
+static int
+refsearch(const void *label, const void *p2) {
+  cmark_reference *ref = *(cmark_reference **)p2;
+  return labelcmp((const unsigned char *)label, ref->label);
+}
+static void sort_references(cmark_reference_map *map) {
+  unsigned int i = 0, last = 0, size = map->size;
+  cmark_reference *r = map->refs, **sorted = NULL;
+  sorted = (cmark_reference **)map->mem->calloc(size, sizeof(cmark_reference *));
+  while (r) {
+    sorted[i++] = r;
+    r = r->next;
+  }
+  qsort(sorted, size, sizeof(cmark_reference *), refcmp);
+  for (i = 1; i < size; i++) {
+    if (labelcmp(sorted[i]->label, sorted[last]->label) != 0)
+      sorted[++last] = sorted[i];
+  }
-  add_reference(map, ref);
+  map->sorted = sorted;
+  map->size = last + 1;
 }
 // Returns reference if refmap contains a reference with matching
 // label, otherwise NULL.
 cmark_reference *cmark_reference_lookup(cmark_reference_map *map,
                                         cmark_chunk *label) {
-  cmark_reference *ref = NULL;
+  cmark_reference **ref = NULL;
   unsigned char *norm;
-  unsigned int hash;
   if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH)
     return NULL;
-  if (map == NULL)
+  if (map == NULL || !map->size)
     return NULL;
   norm = normalize_reference(map->mem, label);
   if (norm == NULL)
     return NULL;
-  hash = refhash(norm);
-  ref = map->table[hash % REFMAP_SIZE];
-  while (ref) {
-    if (ref->hash == hash && !strcmp((char *)ref->label, (char *)norm))
-      break;
-    ref = ref->next;
-  }
+  if (!map->sorted)
+    sort_references(map);
+  ref = (cmark_reference **)bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *), refsearch);
   map->mem->free(norm);
-  return ref;
+  return ref ? ref[0] : NULL;
 }
 void cmark_reference_map_free(cmark_reference_map *map) {
-  unsigned int i;
+  cmark_reference *ref;
   if (map == NULL)
     return;
-  for (i = 0; i < REFMAP_SIZE; ++i) {
-    cmark_reference *ref = map->table[i];
-    cmark_reference *next;
-    while (ref) {
-      next = ref->next;
-      reference_free(map, ref);
-      ref = next;
-    }
+  ref = map->refs;
+  while (ref) {
+    cmark_reference *next = ref->next;
+    reference_free(map, ref);
+    ref = next;
   }
+  map->mem->free(map->sorted);
   map->mem->free(map);
 }

data/ext/commonmarker/cmark/src/references.h CHANGED Viewed

@@ -8,21 +8,21 @@
 extern "C" {
 #endif
-#define REFMAP_SIZE 16
 struct cmark_reference {
   struct cmark_reference *next;
   unsigned char *label;
   cmark_chunk url;
   cmark_chunk title;
-  unsigned int hash;
+  unsigned int age;
 };
 typedef struct cmark_reference cmark_reference;
 struct cmark_reference_map {
   cmark_mem *mem;
-  cmark_reference *table[REFMAP_SIZE];
+  cmark_reference *refs;
+  cmark_reference **sorted;
+  unsigned int size;
 };
 typedef struct cmark_reference_map cmark_reference_map;

data/ext/commonmarker/cmark/test/cmark.py CHANGED Viewed

@@ -92,7 +92,7 @@ class CMark:
             else:
                 libnames = [ ["lib", ".so"] ]
             if not library_dir:
-                library_dir = os.path.join("build", "src")
+                library_dir = os.path.join("..", "build", "src")
             for prefix, suffix in libnames:
                 candidate = os.path.join(library_dir, prefix + "cmark-gfm" + suffix)
                 if os.path.isfile(candidate):

data/ext/commonmarker/cmark/test/pathological_tests.py CHANGED Viewed

@@ -5,17 +5,31 @@ import re
 import argparse
 import sys
 import platform
+import itertools
+import multiprocessing
 from cmark import CMark
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Run cmark tests.')
-    parser.add_argument('--program', dest='program', nargs='?', default=None,
-            help='program to test')
-    parser.add_argument('--library-dir', dest='library_dir', nargs='?',
-            default=None, help='directory containing dynamic library')
-    args = parser.parse_args(sys.argv[1:])
+def hash_collisions():
+    REFMAP_SIZE = 16
+    COUNT = 50000
+    def badhash(ref):
+        h = 0
+        for c in ref:
+            a = (h << 6) & 0xFFFFFFFF
+            b = (h << 16) & 0xFFFFFFFF
+            h = ord(c) + a + b - h
+            h = h & 0xFFFFFFFF
+        return (h % REFMAP_SIZE) == 0
+    keys = ("x%d" % i for i in itertools.count())
+    collisions = itertools.islice((k for k in keys if badhash(k)), COUNT)
+    bad_key = next(collisions)
+    document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions)
-cmark = CMark(prog=args.program, library_dir=args.library_dir)
+    return document, re.compile("(<p>\[%s\]</p>\n){%d}" % (bad_key, COUNT-1))
 # list of pairs consisting of input and a regex that must match the output.
 pathological = {
@@ -58,32 +72,56 @@ pathological = {
                   re.compile("abc\ufffd?de\ufffd?")),
     "backticks":
                  ("".join(map(lambda x: ("e" + "`" * x), range(1,10000))),
-                  re.compile("^<p>[e`]*</p>\n$"))
+                  re.compile("^<p>[e`]*</p>\n$")),
+    "reference collisions": hash_collisions()
     }
 whitespace_re = re.compile('/s+/')
 passed = 0
 errored = 0
-failed = 0
+TIMEOUT = 5
+def run_test(inp, regex):
+    parser = argparse.ArgumentParser(description='Run cmark tests.')
+    parser.add_argument('--program', dest='program', nargs='?', default=None,
+            help='program to test')
+    parser.add_argument('--library-dir', dest='library_dir', nargs='?',
+            default=None, help='directory containing dynamic library')
+    args = parser.parse_args(sys.argv[1:])
+    cmark = CMark(prog=args.program, library_dir=args.library_dir)
-print("Testing pathological cases:")
-for description in pathological:
-    (inp, regex) = pathological[description]
     [rc, actual, err] = cmark.to_html(inp)
     if rc != 0:
-        errored += 1
-        print(description, '[ERRORED (return code %d)]' %rc)
+        print('[ERRORED (return code %d)]' % rc)
         print(err)
+        exit(1)
     elif regex.search(actual):
-        print(description, '[PASSED]')
-        passed += 1
+        print('[PASSED]')
     else:
-        print(description, '[FAILED]')
+        print('[FAILED (mismatch)]')
         print(repr(actual))
-        failed += 1
+        exit(1)
+if __name__ == '__main__':
+    print("Testing pathological cases:")
+    for description in pathological:
+        (inp, regex) = pathological[description]
+        print(description, "... ", end='')
+        sys.stdout.flush()
+        p = multiprocessing.Process(target=run_test, args=(inp, regex))
+        p.start()
+        p.join(TIMEOUT)
+        if p.is_alive():
+            p.terminate()
+            p.join()
+            print('[TIMED OUT]')
+            errored += 1
+        elif p.exitcode != 0:
+            errored += 1
+        else:
+            passed += 1
-print("%d passed, %d failed, %d errored" % (passed, failed, errored))
-if (failed == 0 and errored == 0):
-    exit(0)
-else:
-    exit(1)
+    print("%d passed, %d errored" % (passed, errored))
+    exit(errored)

data/ext/commonmarker/cmark/test/spec.txt CHANGED Viewed

@@ -20,15 +20,17 @@ GFM is a strict superset of CommonMark. All the features which are supported in
 GitHub user content and that are not specified on the original CommonMark Spec
 are hence known as **extensions**, and highlighted as such.
+While GFM supports a wide range of inputs, it's worth noting that GitHub.com
+and GitHub Enterprise perform additional post-processing and sanitization after
+GFM is converted to HTML to ensure security and consistency of the website.
 ## What is Markdown?
 Markdown is a plain text format for writing structured documents,
-based on conventions for indicating formatting in email
-and usenet posts.  It was developed by John Gruber (with
-help from Aaron Swartz) and released in 2004 in the form of a
-[syntax description](http://daringfireball.net/projects/markdown/syntax)
-and a Perl script (`Markdown.pl`) for converting Markdown to
-HTML.  In the next decade, dozens of implementations were
+based on conventions used for indicating formatting in email and
+usenet posts.  It was developed in 2004 by John Gruber, who wrote
+the first Markdown-to-HTML converter in Perl, and it soon became
+ubiquitous.  In the next decade, dozens of implementations were
 developed in many languages.  Some extended the original
 Markdown syntax with conventions for footnotes, tables, and
 other document elements.  Some allowed Markdown documents to be
@@ -326,7 +328,7 @@ form feed (`U+000C`), or carriage return (`U+000D`).
 characters].
 A [Unicode whitespace character](@) is
-any code point in the Unicode `Zs` general category, or a tab (`U+0009`),
+any code point in the Unicode `Zs` class, or a tab (`U+0009`),
 carriage return (`U+000D`), newline (`U+000A`), or form feed
 (`U+000C`).
@@ -345,7 +347,7 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`,
 A [punctuation character](@) is an [ASCII
 punctuation character] or anything in
-the general Unicode categories  `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
+the Unicode classes `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
 ## Tabs
@@ -416,7 +418,7 @@ as indentation with four spaces would:
 Normally the `>` that begins a block quote may be followed
 optionally by a space, which is not considered part of the
 content.  In the following case `>` is followed by a tab,
-which is treated as if it were expanded into three spaces.
+which is treated as if it were expanded into spaces.
 Since one of these spaces is considered part of the
 delimiter, `foo` is considered to be indented six spaces
 inside the block quote context, so we get an indented
@@ -495,7 +497,7 @@ We can think of a document as a sequence of
 quotations, lists, headings, rules, and code blocks.  Some blocks (like
 block quotes and list items) contain other blocks; others (like
 headings and paragraphs) contain [inline](@) content---text,
-links, emphasized text, images, code spans, and so on.
+links, emphasized text, images, code, and so on.
 ## Precedence
@@ -6047,15 +6049,6 @@ we just have literal backticks:
 <p>`foo</p>
 ````````````````````````````````
-The following case also illustrates the need for opening and
-closing backtick strings to be equal in length:
-```````````````````````````````` example
-`foo``bar``
-.
-<p>`foo<code>bar</code></p>
-````````````````````````````````
 ## Emphasis and strong emphasis
@@ -6110,14 +6103,14 @@ characters that is not preceded or followed by a `_` character.
 A [left-flanking delimiter run](@) is
 a [delimiter run] that is (a) not followed by [Unicode whitespace],
-and (b) not followed by a [punctuation character], or
+and (b) either not followed by a [punctuation character], or
 preceded by [Unicode whitespace] or a [punctuation character].
 For purposes of this definition, the beginning and the end of
 the line count as Unicode whitespace.
 A [right-flanking delimiter run](@) is
 a [delimiter run] that is (a) not preceded by [Unicode whitespace],
-and (b) not preceded by a [punctuation character], or
+and (b) either not preceded by a [punctuation character], or
 followed by [Unicode whitespace] or a [punctuation character].
 For purposes of this definition, the beginning and the end of
 the line count as Unicode whitespace.
@@ -6196,7 +6189,7 @@ The following rules define emphasis and strong emphasis:
 7.  A double `**` [can close strong emphasis](@)
     iff it is part of a [right-flanking delimiter run].
-8.  A double `__` [can close strong emphasis] iff
+8.  A double `__` [can close strong emphasis]
     it is part of a [right-flanking delimiter run]
     and either (a) not part of a [left-flanking delimiter run]
     or (b) part of a [left-flanking delimiter run]
@@ -6237,7 +6230,7 @@ the following principles resolve ambiguity:
     `<em><em>...</em></em>`.
 14. An interpretation `<em><strong>...</strong></em>` is always
-    preferred to `<strong><em>...</em></strong>`.
+    preferred to `<strong><em>..</em></strong>`.
 15. When two potential emphasis or strong emphasis spans overlap,
     so that the second begins before the first ends and ends after
@@ -8616,11 +8609,11 @@ The link labels are case-insensitive:
 ````````````````````````````````
-If you just want a literal `!` followed by bracketed text, you can
-backslash-escape the opening `[`:
+If you just want bracketed text, you can backslash-escape the
+opening `!` and `[`:
 ```````````````````````````````` example
-!\[foo]
+\!\[foo]
 [foo]: /url "title"
 .
@@ -8835,14 +8828,15 @@ greater number of conditions.
 [Autolink]s can also be constructed without requiring the use of `<` and to `>`
 to delimit them, although they will be recognized under a smaller set of
-circumstances.  All such recognized autolinks can only come after whitespace,
-or any of the delimiting characters `*`, `_`, `~`, `(`, and `[`.
+circumstances.  All such recognized autolinks can only come at the beginning of
+a line, after whitespace, or any of the delimiting characters `*`, `_`, `~`,
+and `(`.
-An [extended www autolink](@) will be recognized when a [valid domain] is
-found. A [valid domain](@) consists of the text `www.`, followed by
-alphanumeric characters, underscores (`_`), hyphens (`-`) and periods (`.`).
-There must be at least one period, and no underscores may be present in the
-last two segments of the domain.
+An [extended www autolink](@) will be recognized when the text `www.` is found
+followed by a [valid domain]. A [valid domain](@) consists of alphanumeric
+characters, underscores (`_`), hyphens (`-`) and periods (`.`).  There must be
+at least one period, and no underscores may be present in the last two segments
+of the domain.
 The scheme `http` will be inserted automatically:
@@ -9846,4 +9840,3 @@ closers:
 After we're done, we remove all delimiters above `stack_bottom` from the
 delimiter stack.

data/lib/commonmarker/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module CommonMarker
-  VERSION = '0.16.7'.freeze
+  VERSION = '0.16.8'.freeze
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: commonmarker
 version: !ruby/object:Gem::Version
-  version: 0.16.7
+  version: 0.16.8
 platform: ruby
 authors:
 - Garen Torikian
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-07-12 00:00:00.000000000 Z
+date: 2017-07-17 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ruby-enum