RubyGems - jekyll-rp_logs - Versions diffs - 0.1.6 → 0.2.0 - Mend

jekyll-rp_logs 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/.codeclimate.yml +46 -0
data/.gitignore +6 -0
data/.rspec +2 -0
data/.rubocop.yml +2 -0
data/.themes/default/source/_config.yml.default +13 -3
data/.travis.yml +9 -0
data/CHANGELOG.md +55 -0
data/README.md +85 -32
data/Rakefile +42 -5
data/jekyll-rp_logs.gemspec +6 -1
data/lib/jekyll/rp_logs/parse_irssi_xchat.rb +35 -37
data/lib/jekyll/rp_logs/parse_mirc.rb +40 -42
data/lib/jekyll/rp_logs/parse_skype_12hour.rb +33 -45
data/lib/jekyll/rp_logs/parse_skype_24hour.rb +33 -41
data/lib/jekyll/rp_logs/parse_weechat.rb +36 -36
data/lib/jekyll/rp_logs/rp_arcs.rb +13 -16
data/lib/jekyll/rp_logs/rp_log_converter.rb +140 -117
data/lib/jekyll/rp_logs/rp_logline.rb +225 -0
data/lib/jekyll/rp_logs/rp_page.rb +63 -0
data/lib/jekyll/rp_logs/rp_parser.rb +9 -103
data/lib/jekyll/rp_logs/rp_tag_index.rb +18 -20
data/lib/jekyll/rp_logs/rp_tasks.rb +5 -6
data/lib/jekyll/rp_logs/version.rb +1 -1
data/lib/jekyll/rp_logs.rb +4 -5
metadata +53 -4

data/lib/jekyll/rp_logs/rp_log_converter.rb CHANGED Viewed

@@ -1,184 +1,208 @@
-require_relative 'rp_parser'
-require_relative 'rp_arcs'
-require_relative 'rp_tags'
+require_relative "rp_parser"
+require_relative "rp_logline"
+require_relative "rp_page"
+require_relative "rp_arcs"
+require_relative "rp_tags"
 module Jekyll
   module RpLogs
     # Consider renaming since it is more of a converter in practice
     class RpLogGenerator < Jekyll::Generator
       safe true
       priority :normal
-      RP_KEY = "rps"
+      @parsers = {}
+      class << self
+        attr_reader :parsers, :rp_key
-      @@parsers = {}
+        def add(parser)
+          @parsers[parser::FORMAT_STR] = parser
+        end
-      def RpLogGenerator.add(parser)
-        @@parsers[parser::FORMAT_STR] = parser
+        ##
+        # Extract global settings from the config file.
+        # The rp directory and collection name is pulled out; it must be the
+        # first collection defined.
+        def extract_settings(config)
+          @rp_key = config["collections"].keys[0].freeze
+        end
       end
       def initialize(config)
-        config['rp_convert'] ||= true
-      end
+        # Should actually probably complain if things are undefined or missing
+        config["rp_convert"] = true unless config.key? "rp_convert"
+        RpLogGenerator.extract_settings(config)
+        LogLine.extract_settings(config)
-      def skip_page(page, message)
-        @site.collections[RP_KEY].docs.delete page
-        print "\nSkipping #{page.path}: #{message}"
+        Jekyll.logger.info "Loaded jekyll-rp_logs #{RpLogs::VERSION}"
       end
-      def has_errors?(page)
-        # Verify that formats are specified
-        if page.data['format'].nil? || page.data['format'].length == 0 then
-          skip_page(page, "No formats specified")
-          return true
-        else
-          # Verify that the parser for each format exists
-          page.data['format'].each { |format|
-            if @@parsers[format].nil? then
-              skip_page(page, "Format #{format} does not exist.")
-              return true
-            end
-          }
-        end
+      def generate(site)
+        return unless site.config["rp_convert"]
-        # Verify that tags exist
-        if page.data['rp_tags'].nil? then
-          skip_page(page, "No tags specified")
-          return true
-        # Verify that arc names are in the proper format
-        elsif not (page.data['arc_name'].nil? || page.data['arc_name'].respond_to?('each')) then
-          skip_page(page, "arc_name must be blank or a YAML list")
-          return true
-        end
+        main_index, arc_index = extract_indexes(site)
-        false
+        # Pull out all the pages that are error-free
+        rp_pages = extract_valid_rps(site)
+        convert_all_pages(site, main_index, arc_index, rp_pages)
       end
-      def generate(site)
-        return unless site.config['rp_convert']
-        @site = site
+      private
+      ##
+      # Convenience method for accessing the collection key name
+      def rp_key
+        self.class.rp_key
+      end
+      ##
+      #
+      def extract_indexes(site)
         # Directory of RPs
-        index = site.pages.detect { |page| page.data['rp_index'] }
-        index.data['rps'] = {'canon' => [], 'noncanon' => []}
+        main_index = site.pages.find { |page| page.data["rp_index"] }
+        main_index.data["rps"] = { "canon" => [], "noncanon" => [] }
         # Arc-style directory
-        arc_page = site.pages.detect { |page| page.data['rp_arcs'] }
+        arc_index = site.pages.find { |page| page.data["rp_arcs"] }
-        site.data['menu_pages'] = [index, arc_page]
+        site.data["menu_pages"] = [main_index, arc_index]
+      end
+      ##
+      # Returns a list of RpLogs::Page objects that are error-free.
+      def extract_valid_rps(site)
+        site.collections[rp_key].docs.map { |p| RpLogs::Page.new(p) }
+          .reject do |p|
+            message = p.errors?(self.class.parsers)
+            skip_page(site, p, message) if message
+            message
+          end
+      end
+      def convert_all_pages(site, main_index, arc_index, rp_pages)
         arcs = Hash.new { |hash, key| hash[key] = Arc.new(key) }
         no_arc_rps = []
         # Convert all of the posts to be pretty
         # Also build up our hash of tags
-        site.collections[RP_KEY].docs.select { true }
-          .each { |page|
-            # because we're iterating over a selected array, we can delete from the original
-            begin
-              next if has_errors? page
-              page.data['rp_tags'] = page.data['rp_tags'].split(',').map { |t| Tag.new t }
-              # Skip if something goes wrong
-              next unless convertRp page
-              key = if page.data['canon'] then 'canon' else 'noncanon' end
-              # Add key for canon/noncanon
-              index.data['rps'][key] << page
-              # Add tag for canon/noncanon
-              page.data['rp_tags'] << (Tag.new key)
-              page.data['rp_tags'].sort!
-              arc_name = page.data['arc_name']
-              if arc_name then
-                arc_name.each { |n| arcs[n] << page }
-              else
-                no_arc_rps << page
-              end
-            rescue
-              # Catch all for any other exception encountered when parsing a page
-              skip_page(page, "Error parsing #{page.path}: " + $!.inspect)
-              # Raise exception, so Jekyll prints backtrace if run with --trace
-              raise $!
+        rp_pages.each do |page|
+          begin
+            # Skip if something goes wrong
+            next unless convert_rp(site, page)
+            key = page[:canon] ? "canon" : "noncanon"
+            # Add key for canon/noncanon
+            main_index.data["rps"][key] << page
+            # Add tag for canon/noncanon
+            page[:rp_tags] << (Tag.new key)
+            page[:rp_tags].sort!
+            arc_name = page[:arc_name]
+            if arc_name && !arc_name.empty?
+              arc_name.each { |n| arcs[n] << page }
+            else
+              no_arc_rps << page
             end
-          }
-        arcs.each_key { |key| sort_chronologically! arcs[key].rps }
-        combined_rps = no_arc_rps.map { |x| ['rp', x] } + arcs.values.map { |x| ['arc', x] }
-        combined_rps.sort_by! { |type,x|
+            Jekyll.logger.info "Converted #{page.basename}"
+          rescue
+            # Catch all for any other exception encountered when parsing a page
+            skip_page(site, page, "Error parsing #{page.basename}: #{$ERROR_INFO.inspect}")
+            # Raise exception, so Jekyll prints backtrace if run with --trace
+            raise $ERROR_INFO
+          end
+        end
+        arcs.each_key { |key| sort_chronologically! arcs[key].rps }
+        combined_rps = no_arc_rps.map { |x| ["rp", x] } + arcs.values.map { |x| ["arc", x] }
+        combined_rps.sort_by! { |type, x|
           case type
-          when 'rp'
-            x.data['start_date']
-          when 'arc'
-            x.start_date
+          when "rp"
+            x[:time_line] || x[:start_date]
+          when "arc"
+            x.start_date
           end
         }.reverse!
-        arc_page.data['rps'] = combined_rps
+        arc_index.data["rps"] = combined_rps
-        sort_chronologically! index.data['rps']['canon']
-        sort_chronologically! index.data['rps']['noncanon']
+        sort_chronologically! main_index.data["rps"]["canon"]
+        sort_chronologically! main_index.data["rps"]["noncanon"]
       end
-      def sort_chronologically!(pages)
-        pages.sort_by! { |p| p.data['start_date'] }.reverse!
+      def sort_chronologically!(pages)
+        # Check pages for invalid time_line value
+        pages.each do |p|
+          if p[:time_line] && !p[:time_line].is_a?(Date)
+            Jekyll.logger.error "Malformed time_line #{p[:time_line]} in file #{p.path}"
+            fail "Malformed time_line date"
+          end
+        end
+        # Sort pages by time_line if present or start_date otherwise
+        pages.sort_by! { |p| p[:time_line] || p[:start_date] }.reverse!
       end
-      def convertRp(page)
-        options = get_options page
+      def convert_rp(site, page)
+        options = page.options
         compiled_lines = []
-        page.content.each_line { |raw_line|
-          page.data['format'].each { |format|
-            log_line = @@parsers[format].parse_line(raw_line, options)
-            if log_line then
-              compiled_lines << log_line
+        page.content.each_line { |raw_line|
+          page[:format].each { |format|
+            log_line = self.class.parsers[format].parse_line(raw_line, options)
+            if log_line
+              compiled_lines << log_line
               break
             end
           }
         }
-        if compiled_lines.length == 0 then
-          skip_page(page, "No lines were matched by any format.")
+        if compiled_lines.length == 0
+          skip_page(site, page, "No lines were matched by any format.")
           return false
         end
         merge_lines! compiled_lines
         stats = extract_stats compiled_lines
-        split_output = compiled_lines.map { |line| line.output }
+        split_output = compiled_lines.map(&:output)
         page.content = split_output.join("\n")
-        if page.data['infer_char_tags'] then
+        if page[:infer_char_tags]
           # Turn the nicks into characters
-          nick_tags = stats[:nicks].map! { |n| Tag.new('char:' + n) }
-          page.data['rp_tags'] = (nick_tags.merge page.data['rp_tags']).to_a.sort
+          nick_tags = stats[:nicks].map! { |n| Tag.new("char:" + n) }
+          page[:rp_tags] = (nick_tags.merge page[:rp_tags]).to_a.sort
         end
-        page.data['end_date'] = stats[:end_date]
-        page.data['start_date'] ||= stats[:start_date]
+        page[:end_date] = stats[:end_date]
+        page[:start_date] ||= stats[:start_date]
         true
       end
-      def get_options(page)
-        { :strict_ooc => page.data['strict_ooc'],
-          :merge_text_into_rp => page.data['merge_text_into_rp'] }
+      ##
+      # Skip the page. Removes it from the site collection, and outputs a
+      # warning message saying it was skipped with the given reason.
+      def skip_page(site, page, message)
+        site.collections[rp_key].docs.delete page.page
+        Jekyll.logger.warn "Skipping #{page.basename}: #{message}"
       end
+      ##
+      # Consider moving this into Parser or RpLogs::Page
+      # It doesn't really belong here
       def merge_lines!(compiled_lines)
         last_line = nil
-        compiled_lines.reject! { |line|
-          if last_line == nil then
+        compiled_lines.reject! { |line|
+          if last_line.nil?
             last_line = line
             false
-          elsif last_line.mergeable_with? line then
+          elsif last_line.mergeable_with? line
             last_line.merge! line
-            # Delete the current line from output and maintain last_line
+            # Delete the current line from output and maintain last_line
             # in case we need to merge multiple times.
-            true
+            true
           else
             last_line = line
             false
@@ -186,17 +210,16 @@ module Jekyll
         }
       end
-      def extract_stats(compiled_lines)
+      def extract_stats(compiled_lines)
         nicks = Set.new
-        compiled_lines.each { |line|
+        compiled_lines.each { |line|
           nicks << line.sender if line.output_type == :rp
         }
-        { :nicks => nicks,
-          :end_date => compiled_lines[-1].timestamp,
-          :start_date => compiled_lines[0].timestamp }
+        { nicks: nicks,
+          end_date: compiled_lines[-1].timestamp,
+          start_date: compiled_lines[0].timestamp }
       end
     end
   end
-end
+end

data/lib/jekyll/rp_logs/rp_logline.rb ADDED Viewed

@@ -0,0 +1,225 @@
+require "cgi"
+module Jekyll
+  module RpLogs
+    class LogLine
+      RP_FLAG = "!RP".freeze
+      OOC_FLAG = "!OOC".freeze
+      MERGE_FLAG = "!MERGE".freeze
+      SPLIT_FLAG = "!SPLIT".freeze
+      attr_reader :timestamp, :mode, :sender, :contents, :flags
+      # Some things depend on the original type of the line (nick format)
+      attr_reader :base_type, :output_type
+      attr_reader :options
+      # Timestamp of the most recent line this line was merged with, to allow
+      # merging consecutive lines each MAX_SECONDS_BETWEEN_POSTS apart
+      attr_reader :last_merged_timestamp
+      # The max number of seconds between two lines that can still be merged
+      @max_seconds_between_posts = 3
+      # All characters that can denote the beginning of an OOC line
+      @ooc_start_delimiters = "([".freeze
+      class << self
+        attr_reader :ooc_start_delimiters, :max_seconds_between_posts
+        def extract_settings(config)
+          @max_seconds_between_posts = config.fetch("max_seconds_between_posts",
+                                                    @max_seconds_between_posts)
+          @ooc_start_delimiters = config.fetch("ooc_start_delimiters",
+                                               @ooc_start_delimiters).freeze
+        end
+      end
+      def initialize(timestamp, options = {}, sender:, contents:, flags:, type:, mode: " ")
+        @timestamp = timestamp
+        # Initialize to be the same as @timestamp
+        @last_merged_timestamp = timestamp
+        @mode = mode
+        @sender = sender
+        @contents = contents
+        @flags = flags.split(" ")
+        @base_type = type
+        @output_type = type
+        @options = options
+        classify
+      end
+      ##
+      # Set derived properties of this LogLine based on various options
+      private def classify
+        # This makes it RP by default
+        @output_type = :rp if @options[:strict_ooc]
+        # Check the contents for leading ( or [
+        @output_type = :ooc if ooc_start_delimiters.include? @contents.strip[0]
+        # Flags override our assumptions, always
+        if @flags.include? RP_FLAG
+          @output_type = :rp
+        elsif @flags.include? OOC_FLAG
+          @output_type = :ooc
+        end
+        # TODO: Containing both flags should result in a warning
+      end
+      def output
+        tag_open, tag_close = output_tags
+        # Escape any HTML special characters in the input
+        escaped_content = CGI.escapeHTML(@contents)
+        "#{tag_open}#{output_timestamp}#{output_sender} #{escaped_content}#{tag_close}"
+      end
+      def output_timestamp
+        # String used for the timestamp anchors
+        anchor = @timestamp.strftime("%Y-%m-%d_%H:%M:%S")
+        # String used when hovering over timestamps (friendly long-form)
+        title = @timestamp.strftime("%H:%M:%S %B %-d, %Y")
+        # String actually displayed on page
+        display = @timestamp.strftime("%H:%M")
+        "<a name=\"#{anchor}\" title=\"#{title}\" href=\"##{anchor}\">#{display}</a>"
+      end
+      def output_sender
+        case @base_type
+        when :rp
+          return "  * #{@sender}"
+        when :ooc
+          return " &lt;#{@mode}#{@sender}&gt;"
+        else
+          # Explode.
+          fail "No known type: #{@base_type}"
+        end
+      end
+      def output_tags
+        tag_class =
+          case @output_type
+          when :rp then "rp"
+          when :ooc then "ooc"
+          else # Explode.
+            fail "No known type: #{@output_type}"
+          end
+        tag_open = "<p class=\"#{tag_class}\">"
+        tag_close = "</p>"
+        [tag_open, tag_close]
+      end
+      ##
+      # Check if this line can be merged with the given line. In order to be
+      # merged, the two lines must fulfill the following requirements:
+      #
+      # * The timestamp difference is >= 0 and <= MAX_SECONDS_BETWEEN POSTS
+      #   (close_enough_timestamps?)
+      # * The lines have the same sender (same_sender?)
+      # * The first line has output_type :rp (rp?)
+      # * The next line has output_type :rp OR the sender has been specified
+      #   as someone who splits to normal text
+      #
+      # Exceptions:
+      # * If the next line has the SPLIT flag, it will never be merged
+      # * If the next line has the MERGE flag, it will always be merged
+      def mergeable_with?(next_line)
+        # Perform the checks for the override flags
+        return true if next_line.merge_flag?
+        return false if next_line.split_flag?
+        mergeable_ignoring_flags?(next_line)
+      end
+      ##
+      # Does all the rest of the checks that don't have to do with the
+      # override flags SPLIT_FLAG and MERGE_FLAG.
+      private def mergeable_ignoring_flags?(next_line)
+        close_enough_timestamps?(next_line) &&
+          same_sender?(next_line) &&
+          rp? &&
+          (next_line.rp? || next_line.possible_split_to_normal_text?)
+      end
+      def merge!(next_line)
+        @contents += "#{space_between_lines}#{next_line.contents}"
+        @last_merged_timestamp = next_line.timestamp
+        self
+      end
+      ##
+      # Returns "" if the sender has been said to split by characters.
+      # Returns " " otherwise.
+      #
+      # When the sender splits by characters, adding a space will put spaces in
+      # the middle of words. Their spaces will be preserved at the end of lines.
+      private def space_between_lines
+        if options[:splits_by_character] &&
+           options[:splits_by_character].include?(@sender)
+          ""
+        else
+          " "
+        end
+      end
+      ##
+      # Returns true if this line has the output_type :rp
+      def rp?
+        @output_type == :rp
+      end
+      def split_flag?
+        @flags.include? SPLIT_FLAG
+      end
+      def merge_flag?
+        @flags.include? MERGE_FLAG
+      end
+      ##
+      # Return true if this sender splits to normal text, and the line base
+      # type was OOC. This allows you to force a quick text post not to merge
+      # by flagging it !OOC.
+      #
+      # Only merge if the base type was OOC... otherwise you couldn't force not merging
+      # Maybe a job for !NOTMERGE flag, or similar
+      protected def possible_split_to_normal_text?
+        base_type == :ooc && @options[:merge_text_into_rp] &&
+          @options[:merge_text_into_rp].include?(@sender)
+      end
+      def inspect
+        "<#{@mode}#{@sender}> (#{@base_type} -> #{@output_type}) #{@contents}"
+      end
+      private
+      ##
+      # Only merge posts close enough in time
+      # The difference in time between the post merged into this one, and
+      # the next post, must be less than the limit (and non-negative)
+      def close_enough_timestamps?(next_line)
+        time_diff = (next_line.timestamp - @last_merged_timestamp) * 24 * 60 * 60
+        time_diff >= 0 && time_diff <= max_seconds_between_posts
+      end
+      ##
+      # Returns if these lines have the same sender
+      def same_sender?(next_line)
+        @sender == next_line.sender
+      end
+      ##
+      # Convenience methods for accessing class instance variables
+      def max_seconds_between_posts
+        self.class.max_seconds_between_posts
+      end
+      def ooc_start_delimiters
+        self.class.ooc_start_delimiters
+      end
+    end
+  end
+end

data/lib/jekyll/rp_logs/rp_page.rb ADDED Viewed

@@ -0,0 +1,63 @@
+module Jekyll
+  module RpLogs
+    class Page
+      extend Forwardable
+      def_delegators :@page, :basename, :content, :content=, :path, :to_liquid
+      # Jekyll::Page object
+      attr_reader :page
+      def initialize(page)
+        @page = page
+        # If the tags exist, try to convert them to a list of Tag objects
+        self[:rp_tags] &&= self[:rp_tags].split(",").map { |t| Tag.new t }
+      end
+      ##
+      # Pass the request along to the page's data hash, and allow symbols to be
+      # used by converting them to strings first.
+      def [](key)
+        @page.data[key.to_s]
+      end
+      def []=(key, value)
+        @page.data[key.to_s] = value
+      end
+      ##
+      # Check this page for errors, using the provided list of supported parse
+      # formats
+      #
+      # Returns false if there is no error
+      # Returns error_message if there is an error
+      def errors?(supported_formats)
+        # Verify that formats are specified
+        if self[:format].nil? || self[:format].empty?
+          return "No formats specified"
+        end
+        # Verify that the parser for each format exists
+        self[:format].each do |format|
+          return "Format #{format} does not exist." unless supported_formats[format]
+        end
+        # Verify that tags exist
+        return "No tags specified" if self[:rp_tags].nil?
+        # Verify that arc names are in the proper format
+        if self[:arc_name] && !self[:arc_name].respond_to?("each")
+          return "arc_name must be blank or a YAML list"
+        end
+        false
+      end
+      def options
+        { strict_ooc: self[:strict_ooc],
+          merge_text_into_rp: self[:merge_text_into_rp],
+          splits_by_character: self[:splits_by_character] }
+      end
+    end
+  end
+end