RubyGems - fast_underscore - Versions diffs - 0.0.3 → 0.3.2 - Mend

fast_underscore 0.0.3 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +5 -5
data/.github/dependabot.yml +6 -0
data/.github/workflows/main.yml +44 -0
data/.gitignore +2 -0
data/.rubocop.yml +24 -5
data/CHANGELOG.md +44 -0
data/CODE_OF_CONDUCT.md +76 -0
data/Gemfile +1 -1
data/Gemfile.lock +58 -31
data/{LICENSE.txt → LICENSE} +1 -1
data/README.md +16 -25
data/Steepfile +6 -0
data/bin/{benchmark → bench} +3 -2
data/bin/console +1 -1
data/ext/fast_underscore/fast_underscore.c +133 -182
data/fast_underscore.gemspec +6 -5
data/gemfiles/5.1.gemfile +7 -0
data/gemfiles/5.2.gemfile +7 -0
data/gemfiles/6.0.gemfile +7 -0
data/gemfiles/6.1.gemfile +7 -0
data/lib/fast_underscore.rb +71 -14
data/lib/fast_underscore/version.rb +1 -1
data/sig/fast_underscore.rbs +10 -0
metadata +40 -20
data/.travis.yml +0 -6
data/bin/rake +0 -29
data/bin/rubocop +0 -29

data/bin/console CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env ruby
+# frozen_string_literal: true
 require 'bundler/setup'
-require 'active_support'
 require 'fast_underscore'
 require 'irb'

data/ext/fast_underscore/fast_underscore.c CHANGED Viewed

@@ -1,34 +1,22 @@
 #include <ruby.h>
 #include <ruby/encoding.h>
-#include <stdlib.h>
-/**
- * true if the given codepoint is a lowercase ascii character.
- */
-static int
-character_is_lower(unsigned int character) {
+// true if the given codepoint is a lowercase ascii character.
+static int character_is_lower(unsigned int character) {
   return character >= 'a' && character <= 'z';
 }
-/**
- * true if the given codepoint is a uppercase ascii character.
- */
-static int
-character_is_upper(unsigned int character) {
+// true if the given codepoint is a uppercase ascii character.
+static int character_is_upper(unsigned int character) {
   return character >= 'A' && character <= 'Z';
 }
-/**
- * true if the given codepoint is an ascii digit.
- */
-static int
-character_is_digit(unsigned int character) {
+// true if the given codepoint is an ascii digit.
+static int character_is_digit(unsigned int character) {
   return character >= '0' && character <= '9';
 }
-/**
- * Macros for extracting the character out of the `codepoint_t` struct.
- */
+// Macros for extracting the character out of the `codepoint_t` struct.
 #define codepoint_is_lower(codepoint) character_is_lower(codepoint->character)
 #define codepoint_is_upper(codepoint) character_is_upper(codepoint->character)
 #define codepoint_is_digit(codepoint) character_is_digit(codepoint->character)
@@ -62,9 +50,64 @@ typedef struct codepoint {
  * A struct for tracking the built string as it gets converted. Maintains an
  * internal DFA for transitioning through various inputs to match certain
  * patterns that need to be separated with underscores.
+ *
+ * The internal DFA looks like:
+ *
+ *      ┌ - ┐ ┌ * ┐
+ *      │   v │   v
+ *    ┌─────────────┐           ┌─────────────┐
+ *    │             │──── : ───>│             │
+ * ──>│   DEFAULT   │<─── : ────│    COLON    │
+ *    │             │<─── * ────│             │
+ *    └─────────────┘           └─────────────┘
+ *      │   ^  ^  ^
+ *      │   │  │  └───── a-z ─────────────┐
+ *   0-9A-Z *  │                          │
+ *      │   │  └───────── * ────────┐     │
+ *      v   │                       │     │
+ *    ┌─────────────┐           ┌─────────────┐
+ *    │             │─── A-Z ──>│             │
+ *    │ UPPER_START │           │  UPPER_END  │
+ *    │             │<── 0-9 ───│             │
+ *    └─────────────┘           └─────────────┘
+ *       │     ^                    ^     │
+ *       └ 0-9 ┘                    └ A-Z ┘
+ *
+ * Transitions from DEFAULT:
+ * - On "-", push an "_" and stay on DEFAULT
+ * - On ":", go to COLON
+ * - On a digit or upper, start a buffer with the char and go to UPPER_START
+ * - On anything else, push the char and stay on DEFAULT
+ *
+ * Transitions from COLON:
+ * - On ":", push a "/" and go to DEFAULT
+ * - On anything else, push a ":" and the char and go to DEFAULT
+ *
+ * Transitions from UPPER_START:
+ * - On a digit, push the digit and stay on UPPER_START
+ * - On an upper, push the upper and go to UPPER_END
+ * - On anything else, push the buffer, go to DEFAULT, then handle the char
+ *
+ * Transitions from UPPER_END:
+ * - On a digit, push the digit onto the buffer and go to UPPER_START
+ * - On an upper, push the upper onto the buffer and stay on UPPER_END
+ * - On a lower, push the buffer up to the last char, push an "_", then push
+ *   the last char of the buffer, go to DEFAULT, then handle the char
+ * - On anything else, push the buffer, go to DEFAULT, then handle the char
+ *
+ * These transitions allow us to accomplish the equivalent of the following code
+ * with one pass through the string:
+ *
+ * def underscore(word)
+ *   word.gsub!('::', '/')
+ *   word.gsub!(/([A-Z\d]+)([A-Z][a-z])/, '\1_\2')
+ *   word.gsub!(/([a-z\d])([A-Z])/, '\1_\2')
+ *   word.tr!('-', '_')
+ *   word.downcase!
+ * end
  */
 typedef struct builder {
-  // The state of the DFA in which is the builder
+  // The state of the DFA that the builder is in
   enum state {
     STATE_DEFAULT,
     STATE_COLON,
@@ -82,78 +125,15 @@ typedef struct builder {
   // Whether or not the last pushed result character should cause the following
   // one to be spaced by an underscore
-  int pushNext;
+  int push_next;
 } builder_t;
-/**
- * Allocate and initialize a `codepoint_t` struct.
- */
-static codepoint_t*
-codepoint_build(rb_encoding *encoding) {
-  codepoint_t *codepoint;
-  codepoint = (codepoint_t *) malloc(sizeof(codepoint_t));
-  if (codepoint == NULL) {
-    return NULL;
-  }
-  codepoint->encoding = encoding;
-  return codepoint;
-}
-/**
- * Free a previously allocated `codepoint_t` struct.
- */
-static void
-codepoint_free(codepoint_t *codepoint) {
-  free(codepoint);
-}
-/**
- * Allocate and initialize a `builder_t` struct.
- */
-static builder_t*
-builder_build(long str_len) {
-  builder_t *builder;
-  builder = (builder_t *) malloc(sizeof(builder_t));
-  if (builder == NULL) {
-    return NULL;
-  }
-  builder->state = STATE_DEFAULT;
-  builder->segment = (char *) malloc(str_len * sizeof(unsigned int) * 2);
-  if (builder->segment == NULL) {
-    free(builder);
-    return NULL;
-  }
-  builder->result = (char *) malloc(str_len * sizeof(unsigned int) * 2);
-  if (builder->result == NULL) {
-    free(builder->segment);
-    free(builder);
-    return NULL;
-  }
-  builder->segment_size = 0;
-  builder->result_size = 0;
-  builder->pushNext = 0;
-  return builder;
-}
-/**
- * Push a character onto the resultant string using the given codepoint and
- * encoding.
- */
-static void
-builder_result_push_char(builder_t *builder, unsigned int character, int size,
-                         rb_encoding *encoding) {
+// Push a character onto the resultant string using the given codepoint and
+// encoding.
+static void builder_result_push_char(builder_t *builder, unsigned int character, int size, rb_encoding *encoding) {
   if (character_is_upper(character)) {
-    if (builder->pushNext == 1) {
-      builder->pushNext = 0;
+    if (builder->push_next == 1) {
+      builder->push_next = 0;
       builder_result_push_literal(builder, '_');
     }
@@ -161,7 +141,7 @@ builder_result_push_char(builder_t *builder, unsigned int character, int size,
     return;
   }
-  builder->pushNext = (character_is_lower(character) || character_is_digit(character));
+  builder->push_next = (character_is_lower(character) || character_is_digit(character));
   if (encoding == NULL) {
     builder->result[builder->result_size++] = (char) character;
@@ -171,20 +151,14 @@ builder_result_push_char(builder_t *builder, unsigned int character, int size,
   }
 }
-/**
- * Push the given codepoint onto the builder.
- */
-static void
-builder_segment_push(builder_t *builder, codepoint_t *codepoint) {
+// Push the given codepoint onto the builder.
+static void builder_segment_push(builder_t *builder, codepoint_t *codepoint) {
   builder->segment[builder->segment_size++] = (char) codepoint->character;
 }
-/**
- * Copy the given number of characters out of the segment cache onto the result
- * string.
- */
-static void
-builder_segment_copy(builder_t *builder, long size) {
+// Copy the given number of characters out of the segment cache onto the result
+// string.
+static void builder_segment_copy(builder_t *builder, long size) {
   long idx;
   for (idx = 0; idx < size; idx++) {
@@ -192,24 +166,18 @@ builder_segment_copy(builder_t *builder, long size) {
   }
 }
-/**
- * Restart the `builder_t` back at the default state (because we've hit a
- * character for which we have no allowed transitions).
- */
-static void
-builder_restart(builder_t *builder) {
+// Restart the `builder_t` back at the default state (because we've hit a
+// character for which we have no allowed transitions).
+static void builder_restart(builder_t *builder) {
   builder->state = STATE_DEFAULT;
   builder->segment_size = 0;
 }
 static void builder_next(builder_t *builder, codepoint_t *codepoint);
-/**
- * Pull the remaining content out of the cached segment in case we don't end
- * parsing while not in the default state.
- */
-static void
-builder_flush(builder_t *builder) {
+// Pull the remaining content out of the cached segment in case we don't end
+// parsing while not in the default state.
+static void builder_flush(builder_t *builder) {
   switch (builder->state) {
     case STATE_DEFAULT: return;
     case STATE_COLON:
@@ -222,33 +190,30 @@ builder_flush(builder_t *builder) {
   }
 }
-/**
- * Perform transitions from the STATE_DEFAULT state.
- */
-static inline void
-builder_default_transition(builder_t *builder, codepoint_t *codepoint) {
+// Perform transitions from the STATE_DEFAULT state.
+static inline void builder_default_transition(builder_t *builder, codepoint_t *codepoint) {
   if (codepoint->character == '-') {
     builder_result_push_literal(builder, '_');
     return;
   }
   if (codepoint->character == ':') {
     builder->state = STATE_COLON;
     return;
   }
   if (codepoint_is_digit(codepoint) || codepoint_is_upper(codepoint)) {
     builder->segment[0] = (char) codepoint->character;
     builder->segment_size = 1;
     builder->state = STATE_UPPER_START;
     return;
   }
   builder_result_push(builder, codepoint);
 }
-/**
- * Perform transitions from the STATE_COLON state.
- */
-static inline void
-builder_colon_transition(builder_t *builder, codepoint_t *codepoint) {
+// Perform transitions from the STATE_COLON state.
+static inline void builder_colon_transition(builder_t *builder, codepoint_t *codepoint) {
   if (codepoint->character == ':') {
     builder_result_push_literal(builder, '/');
     builder_restart(builder);
@@ -260,15 +225,13 @@ builder_colon_transition(builder_t *builder, codepoint_t *codepoint) {
   builder_next(builder, codepoint);
 }
-/**
- * Perform transitions from the STATE_UPPER_START state.
- */
-static inline void
-builder_upper_start_transition(builder_t *builder, codepoint_t *codepoint) {
+// Perform transitions from the STATE_UPPER_START state.
+static inline void builder_upper_start_transition(builder_t *builder, codepoint_t *codepoint) {
   if (codepoint_is_digit(codepoint)) {
     builder_segment_push(builder, codepoint);
     return;
   }
   if (codepoint_is_upper(codepoint)) {
     builder_segment_push(builder, codepoint);
     builder->state = STATE_UPPER_END;
@@ -280,20 +243,19 @@ builder_upper_start_transition(builder_t *builder, codepoint_t *codepoint) {
   builder_next(builder, codepoint);
 }
-/**
- * Perform transitions from the STATE_UPPER_END state.
- */
-static inline void
-builder_upper_end_transition(builder_t *builder, codepoint_t *codepoint) {
+// Perform transitions from the STATE_UPPER_END state.
+static inline void builder_upper_end_transition(builder_t *builder, codepoint_t *codepoint) {
   if (codepoint_is_digit(codepoint)) {
     builder_segment_push(builder, codepoint);
     builder->state = STATE_UPPER_START;
     return;
   }
   if (codepoint_is_upper(codepoint)) {
     builder_segment_push(builder, codepoint);
     return;
   }
   if (codepoint_is_lower(codepoint)) {
     builder_segment_copy(builder, builder->segment_size - 1);
     builder_result_push_literal(builder, '_');
@@ -308,12 +270,8 @@ builder_upper_end_transition(builder_t *builder, codepoint_t *codepoint) {
   builder_next(builder, codepoint);
 }
-/**
- * Accept the next codepoint, which will move the `builder_t` struct into the
- * next state.
- */
-static void
-builder_next(builder_t *builder, codepoint_t *codepoint) {
+// Accept the next codepoint, which will move the `builder_t` struct into the next state.
+static void builder_next(builder_t *builder, codepoint_t *codepoint) {
   switch (builder->state) {
     case STATE_DEFAULT:
       return builder_default_transition(builder, codepoint);
@@ -326,16 +284,6 @@ builder_next(builder_t *builder, codepoint_t *codepoint) {
   }
 }
-/**
- * Frees a previously allocated `builder_t` struct.
- */
-static void
-builder_free(builder_t *builder) {
-  free(builder->segment);
-  free(builder->result);
-  free(builder);
-}
 /**
  * Makes an underscored, lowercase form from the expression in the string.
  *
@@ -349,43 +297,46 @@ builder_free(builder_t *builder) {
  *
  *     camelize(underscore('SSLError'))  # => "SslError"
  */
-static VALUE
-rb_str_underscore(VALUE self, VALUE rb_string) {
-  VALUE resultant;
-  rb_encoding *encoding;
-  char *string;
-  char *end;
-  builder_t *builder;
-  codepoint_t *codepoint;
-  encoding = rb_enc_from_index(ENCODING_GET(rb_string));
-  string = RSTRING_PTR(rb_string);
-  end = RSTRING_END(rb_string);
-  builder = builder_build(RSTRING_LEN(rb_string) * 2);
-  codepoint = codepoint_build(encoding);
-  while (string < end) {
-    codepoint->character = rb_enc_codepoint_len(string, end, &codepoint->size, encoding);
-    builder_next(builder, codepoint);
-    string += codepoint->size;
+static VALUE underscore(VALUE string) {
+  char segment[RSTRING_LEN(string) * 2 * sizeof(unsigned int) * 2];
+  char result[RSTRING_LEN(string) * 2 * sizeof(unsigned int) * 2];
+  builder_t builder = {
+    .state = STATE_DEFAULT,
+    .segment = segment,
+    .result = result,
+    .segment_size = 0,
+    .result_size = 0,
+    .push_next = 0
+  };
+  codepoint_t codepoint = {
+    .encoding = rb_enc_from_index(ENCODING_GET(string)),
+    .character = 0,
+    .size = 0
+  };
+  char *pointer = RSTRING_PTR(string);
+  char *end = RSTRING_END(string);
+  while (pointer < end) {
+    codepoint.character = rb_enc_codepoint_len(pointer, end, &codepoint.size, codepoint.encoding);
+    builder_next(&builder, &codepoint);
+    pointer += codepoint.size;
   }
-  builder_flush(builder);
-  resultant = rb_enc_str_new(builder->result, builder->result_size, encoding);
-  builder_free(builder);
-  codepoint_free(codepoint);
+  builder_flush(&builder);
+  return rb_enc_str_new(builder.result, builder.result_size, codepoint.encoding);
+}
-  return resultant;
+// FastUnderscore::underscore
+static VALUE fast_underscore(VALUE self, VALUE string) {
+  return underscore(string);
 }
-/**
- * Hook into Ruby and define the `FastUnderscore::underscore`.
- */
-void
-Init_fast_underscore(void) {
+// Hook into Ruby and define FastUnderscore::underscore and String#underscore
+void Init_fast_underscore(void) {
   VALUE rb_cFastUnderscore = rb_define_module("FastUnderscore");
-  rb_define_singleton_method(rb_cFastUnderscore, "underscore", rb_str_underscore, 1);
+  rb_define_singleton_method(rb_cFastUnderscore, "underscore", fast_underscore, 1);
+  rb_define_method(rb_cString, "underscore", underscore, 0);
 }