RubyGems - fluent-plugin-k8s-metrics-agg - Versions diffs - 1.1.0 → 1.1.1 - Mend

fluent-plugin-k8s-metrics-agg 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/.circleci/build_and_push.sh +2 -1
data/.circleci/build_and_push_to_dockerhub.sh +2 -1
data/.circleci/check_version_trigger_release.sh +26 -0
data/.circleci/config.yml +32 -4
data/.circleci/gem_credentials +2 -0
data/.circleci/push_gem.sh +1 -0
data/.circleci/push_gem_to_rubygems.sh +7 -0
data/.github/ISSUE_TEMPLATE/bug_report.md +29 -0
data/.github/ISSUE_TEMPLATE/enhancement_request.md +14 -0
data/.github/ISSUE_TEMPLATE/failing_test.md +18 -0
data/.github/PULL_REQUEST_TEMPLATE.md +23 -0
data/CLA.md +18 -0
data/Gemfile.lock +14 -12
data/VERSION +1 -1
data/docker/FLUENTD_HEC_GEM_VERSION +1 -0
data/lib/fluent/plugin/in_kubernetes_metrics_aggregator.rb +18 -9
data/test/helper.rb +55 -70
data/test/plugin/test_in_kubernetes_metrics_aggregator.rb +137 -104
metadata +12 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 60991c52f3ffacbee65a82a67ccb02903417800ffd74287c0fbf322aa03a318e
-  data.tar.gz: 4dd9cf5ae9ee3699d27efd4495c8de0a493f8f9ed536fe03b9ee3dcaf269f8fe
+  metadata.gz: dafdc24e3159101b532162d528800ee781dc5a10c62824a97cc3b5506c977ae0
+  data.tar.gz: d40d159adcac648fee03f13fe88697c5491208ef0557adc4074726be50d88eaa
 SHA512:
-  metadata.gz: 2debd68c82b5d490f1d0b73b304bcf49a39555d24f96473845a21358490fb10bef343775d26963cbc652f6f8c72515931a0a7d2f0021ccc28efdb1271b8bec73
-  data.tar.gz: ad770e300cf2ec07a572c73650b1a06265bb8b0c8d59338535ed45ca4a44655c3dfaecc7acf578233e0273a363e66f80e38cb0710a7e79b358b538300368adb3
+  metadata.gz: 7ace62d329f0b1f316563d758e9a7ca045914d37811606ce6e07f9fb807c2b3cf907d3dd025983356097c813308b191b77677ddbfbbba56d50fe9aa5c7744786
+  data.tar.gz: 456d5c39db1fc1dc0d159c920dd612dc39f474b6d873b3963ee42dbbc7ade903079b113b844c3ef16e2830b545f583f3383caafccec42943f3d96ed0f6d72c4e

data/.circleci/build_and_push.sh CHANGED

@@ -1,9 +1,10 @@
 #!/usr/bin/env bash
 set -e
+FLUENTD_HEC_GEM_VERSION=`cat docker/FLUENTD_HEC_GEM_VERSION`
 aws ecr get-login --region $AWS_REGION --no-include-email | bash
 echo "Building docker image..."
 cp /tmp/pkg/fluent-plugin-k8s-metrics-agg-*.gem docker
-docker build --build-arg VERSION=$FLUENT_SPLUNK_HEC_GEM_VERSION --no-cache -t splunk/fluent-plugin-k8s-metrics-agg:metrics-aggregator ./docker
+docker build --build-arg VERSION=$FLUENTD_HEC_GEM_VERSION --no-cache -t splunk/fluent-plugin-k8s-metrics-agg:metrics-aggregator ./docker
 docker tag splunk/fluent-plugin-k8s-metrics-agg:metrics-aggregator $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/k8s-ci-metrics-agg:latest
 echo "Push docker image to ecr..."
 docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/k8s-ci-metrics-agg:latest | awk 'END{print}'

data/.circleci/build_and_push_to_dockerhub.sh CHANGED

@@ -1,9 +1,10 @@
 #!/usr/bin/env bash
 set -e
+FLUENTD_HEC_GEM_VERSION=`cat docker/FLUENTD_HEC_GEM_VERSION`
 echo "Building docker image..."
 cp /tmp/pkg/fluent-plugin-k8s-metrics-agg-*.gem docker
 VERSION=`cat VERSION`
-docker build --build-arg VERSION=$VERSION --no-cache -t splunk/fluent-plugin-k8s-metrics-agg:metrics-aggregator ./docker
+docker build --build-arg VERSION=$FLUENTD_HEC_GEM_VERSION --no-cache -t splunk/fluent-plugin-k8s-metrics-agg:metrics-aggregator ./docker
 docker tag splunk/fluent-plugin-k8s-metrics-agg:metrics-aggregator splunk/${DOCKERHUB_REPO_NAME}:${VERSION}
 echo "Push docker image to splunk dockerhub..."
 docker login --username=$DOCKERHUB_ACCOUNT_ID --password=$DOCKERHUB_ACCOUNT_PASS

data/.circleci/check_version_trigger_release.sh ADDED

@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+set -e
+#!/usr/bin/env bash
+LATEST_COMMIT=$(git rev-parse HEAD)
+VERSION_COMMIT=$(git log -1 --format=format:%H VERSION)
+if [ $VERSION_COMMIT = $LATEST_COMMIT ];
+    then
+        if [ -s VERSION ] # Check if content is empty
+            then
+                VERSION=`cat VERSION`
+                echo "VERSION is changed to $VERSION"
+            else
+                echo "[ERROR] VERSION file is empty."
+                exit 1
+        fi
+        git config user.email "splunk-oss-admin@splunk.com"
+        git config user.name "splunk-oss-admin"
+        git checkout develop
+        git pull origin develop
+        git checkout -b release/$VERSION origin/develop
+        git push https://$RELEASE_GITHUB_USER:$RELEASE_GITHUB_PASS@github.com/splunk/fluent-plugin-k8s-metrics-agg.git release/$VERSION
+        git checkout master
+        git merge --no-edit release/$VERSION
+        git push https://$RELEASE_GITHUB_USER:$RELEASE_GITHUB_PASS@github.com/splunk/fluent-plugin-k8s-metrics-agg.git master
+fi

data/.circleci/config.yml CHANGED

@@ -82,12 +82,34 @@ jobs:
           name: Upload gem to Github
           command: |
             .circleci/build_and_push_to_github_release.sh
+      - run:
+          name: Upload gem to Ruby Gem
+          command: |
+            .circleci/push_gems_to_rubygems.sh
+  check_version:
+    docker:
+      - image: circleci/ruby:2.6.1-node-browsers
+    working_directory: ~/repo
+    steps:
+      - attach_workspace:
+          at: /tmp
+      - checkout
+      - setup_remote_docker:
+          reusable: true
+      - run:
+          name: Check VERSION file for change
+          command: |
+            .circleci/check_version_trigger_release.sh
 workflows:
   version: 2
   build_test_push:
     jobs:
-      - build
+      - build:
+          filters:
+            branches:
+              ignore: /^release\/.*/
       - test:
           requires:
             - build
@@ -97,9 +119,15 @@ workflows:
           filters:
             branches:
               only: develop
-      - release:
+      - check_version:
           requires:
-          - test
+            - push
+  release:
+    jobs:
+      - build:
           filters:
             branches:
-              only: master
+              only: master
+      - release:
+          requires:
+          - build

data/.circleci/gem_credentials ADDED

	@@ -0,0 +1,2 @@
1	+ ---
2	+ :rubygems_api_key: __RUBYGEMS_API_KEY__

data/.circleci/push_gem.sh CHANGED

@@ -1,5 +1,6 @@
 #!/usr/bin/env bash
 set -e
+sudo apt-get update
 sudo apt-get install -y python-pip libpython-dev > /dev/null 2>&1
 echo "Installing aws cli..."
 sudo pip install awscli > /dev/null 2>&1

data/.circleci/push_gem_to_rubygems.sh ADDED

@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -e
+echo "Pushing metrics aggr gem to rubygems.org..."
+echo "gem `gem --version`"
+cat .circleci/gem_credentials | sed -e "s/__RUBYGEMS_API_KEY__/${RUBYGEMS_API_KEY}/" > ~/.gem/credentials
+chmod 0600 ~/.gem/credentials
+gem push /tmp/pkg/fluent-plugin-k8s-metrics-agg-*.gem

data/.github/ISSUE_TEMPLATE/bug_report.md ADDED

@@ -0,0 +1,29 @@
+---
+name: Bug report
+about: Report a bug encountered while operating fluent-plugin-k8s-metrics-agg
+title: ''
+labels: ''
+assignees: ''
+---
+<!-- Please use this template while reporting a bug and provide as much info as possible. Not doing so may result in your bug not being addressed in a timely manner. Thanks!
+Please do not report security vulnerabilities with public GitHub issue reports. Please report security issues here: https://www.splunk.com/goto/report_vulnerabilities_prodsec
+-->
+**What happened**:
+**What you expected to happen**:
+**How to reproduce it (as minimally and precisely as possible)**:
+**Anything else we need to know?**:
+**Environment**:
+- Kubernetes version (use `kubectl version`):
+- Ruby version (use `ruby --version`):
+- OS (e.g: `cat /etc/os-release`):
+- Splunk version:
+- Others:

data/.github/ISSUE_TEMPLATE/enhancement_request.md ADDED

@@ -0,0 +1,14 @@
+---
+name: Enhancement Request
+about: Suggest an enhancement to the fluent-plugin-k8s-metrics-agg project
+title: ''
+labels: ''
+assignees: ''
+---
+<<!-- Please only use this template for submitting enhancement requests -->
+**What would you like to be added**:
+**Why is this needed**:

data/.github/ISSUE_TEMPLATE/failing_test.md ADDED

@@ -0,0 +1,18 @@
+---
+name: Failing Test
+about: Report test failures in fluent-plugin-k8s-metrics-agg
+title: ''
+labels: ''
+assignees: ''
+---
+<!-- Please only use this template for submitting reports about failing tests -->
+**Which test(s) are failing**:
+**Since when has it been failing**:
+**Reason for failure**:
+**Anything else we need to know**:

data/.github/PULL_REQUEST_TEMPLATE.md ADDED

@@ -0,0 +1,23 @@
+## Proposed changes
+Describe the big picture of your changes here to communicate to the maintainers why we should accept this pull request. If it fixes a bug or resolves a feature request, be sure to link to that issue.
+## Types of changes
+What types of changes does your code introduce?
+_Put an `x` in the boxes that apply_
+- [ ] Bugfix (non-breaking change which fixes an issue)
+- [ ] New feature (non-breaking change which adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+## Checklist
+_Put an `x` in the boxes that apply._
+- [ ] I have read the [CONTRIBUTING](https://github.com/splunk/fluent-plugin-k8s-metrics-agg/blob/develop/CONTRIBUTING.md) doc
+- [ ] I have read the [CLA](https://github.com/splunk/fluent-plugin-k8s-metrics-agg/blob/develop/CLA.md)
+- [ ] I have added tests that prove my fix is effective or that my feature works
+- [ ] I have added necessary documentation (if appropriate)
+- [ ] Any dependent changes have been merged and published in downstream modules

data/CLA.md ADDED

@@ -0,0 +1,18 @@
+By submitting a Contribution to this Work, You agree that Your Contribution is made subject to the primary LICENSE
+file applicable to this Work.  In addition, You represent that: (i) You are the copyright owner of the Contribution
+or (ii) You have the requisite rights to make the Contribution.
+Definitions:
+“You” shall mean: (i) yourself if you are making a Contribution on your own behalf; or (ii) your company,
+if you are making a Contribution on behalf of your company.  If you are making a Contribution on behalf of your
+company, you represent that you have the requisite authority to do so.
+"Contribution" shall mean any original work of authorship, including any modifications or additions to an existing
+work, that is intentionally submitted by You for inclusion in, or documentation of, this project/repository.  For the
+purposes of this definition, "submitted" means any form of electronic, verbal, or written communication submitted for
+inclusion in this project/repository, including but not limited to communication on electronic mailing lists, source
+code control systems, and issue tracking systems that are managed by, or on behalf of, the maintainers of
+the project/repository.
+“Work” shall mean the collective software, content, and documentation in this project/repository.

data/Gemfile.lock CHANGED

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    fluent-plugin-k8s-metrics-agg (1.1.0)
+    fluent-plugin-k8s-metrics-agg (1.1.1)
       fluentd (~> 1.3.3)
       kubeclient (~> 4.2.2)
       multi_json (~> 1.13.1)
@@ -12,12 +12,12 @@ GEM
   specs:
     addressable (2.6.0)
       public_suffix (>= 2.0.2, < 4.0)
-    cool.io (1.5.3)
+    cool.io (1.5.4)
     crack (0.4.3)
       safe_yaml (~> 1.0.0)
     dig_rb (1.0.1)
     docile (1.3.1)
-    domain_name (0.5.20180417)
+    domain_name (0.5.20190701)
       unf (>= 0.0.5, < 1.0.0)
     fluentd (1.3.3)
       cool.io (>= 1.4.5, < 2.0.0)
@@ -36,6 +36,7 @@ GEM
       http-cookie (~> 1.0)
       http-form_data (~> 2.0)
       http_parser.rb (~> 0.6.0)
+    http-accept (1.7.0)
     http-cookie (1.0.3)
       domain_name (~> 0.5)
     http-form_data (2.1.1)
@@ -45,23 +46,24 @@ GEM
       http (~> 3.0)
       recursive-open-struct (~> 1.0, >= 1.0.4)
       rest-client (~> 2.0)
-    mime-types (3.2.2)
+    mime-types (3.3)
       mime-types-data (~> 3.2015)
-    mime-types-data (3.2018.0812)
-    msgpack (1.2.6)
+    mime-types-data (3.2019.1009)
+    msgpack (1.3.1)
     multi_json (1.13.1)
     netrc (0.11.0)
-    oj (3.7.9)
+    oj (3.7.12)
     power_assert (1.1.3)
     public_suffix (3.0.3)
     rake (12.3.2)
     recursive-open-struct (1.1.0)
-    rest-client (2.0.2)
+    rest-client (2.1.0)
+      http-accept (>= 1.7.0, < 2.0)
       http-cookie (>= 1.0.2, < 2.0)
       mime-types (>= 1.16, < 4.0)
       netrc (~> 0.8)
     safe_yaml (1.0.4)
-    serverengine (2.1.0)
+    serverengine (2.1.1)
       sigdump (~> 0.2.2)
     sigdump (0.2.4)
     simplecov (0.16.1)
@@ -75,11 +77,11 @@ GEM
     thread_safe (0.3.6)
     tzinfo (1.2.5)
       thread_safe (~> 0.1)
-    tzinfo-data (1.2018.9)
+    tzinfo-data (1.2019.3)
       tzinfo (>= 1.0.0)
     unf (0.1.4)
       unf_ext
-    unf_ext (0.0.7.5)
+    unf_ext (0.0.7.6)
     webmock (3.5.1)
       addressable (>= 2.3.6)
       crack (>= 0.3.2)
@@ -98,4 +100,4 @@ DEPENDENCIES
   webmock (~> 3.5.1)
 BUNDLED WITH
-   2.0.1
+   2.0.2

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 1.1.0
1	+ 1.1.1

data/docker/FLUENTD_HEC_GEM_VERSION ADDED

	@@ -0,0 +1 @@
1	+ 1.1.2

data/lib/fluent/plugin/in_kubernetes_metrics_aggregator.rb CHANGED

@@ -355,7 +355,7 @@ module Fluent
           begin
             @client.discover unless @client.discovered
             @client.rest_client['/pods'].tap do |endpoint|
-              log.info("Use URL #{endpoint.url} for scraping limits requests metrics")
+              log.debug("Use URL #{endpoint.url} for scraping limits requests metrics")
             end
           end
       end
@@ -363,6 +363,9 @@ module Fluent
       def scrape_limits_requests_metrics
         response = limits_requests_api.get(@client.headers)
         handle_limits_requests_res(response)
+      rescue StandardError => e
+        log.error "Failed to get limit metrics, error=#{$ERROR_INFO}, #{e.inspect}"
+        log.error_backtrace
       end
       # This method is used to handle responses from the kube apiserver api
@@ -375,7 +378,7 @@ module Fluent
           log.error "ExMultiJson.load(response.body) expected 2xx from summary API, but got #{response.code}. Response body = #{response.body}"
         end
       rescue StandardError => e
-        log.error "Failed to scrape metrics, error=#{$ERROR_INFO}, #{e.inspect}"
+        log.error "Failed to scrape limit metrics, error=#{$ERROR_INFO}, #{e.inspect}"
         log.error_backtrace
       end
@@ -420,7 +423,7 @@ module Fluent
               pod_usage_metrics.add_usage_metrics(cpu_limit, cpu_request, memory_limit, memory_request)
             end
-            pod_labels = { 'name' => pod_json['metadata']['name'], 'namespace' => pod_json['metadata']['name'], 'node' => pod_json['spec']['nodeName'] }
+            pod_labels = { 'name' => pod_json['metadata']['name'], 'namespace' => pod_json['metadata']['namespace'], 'node' => pod_json['spec']['nodeName'] }
             emit_limits_requests_metrics(generate_tag('pod'), @scraped_at, pod_labels, pod_usage_metrics)
             @@namespace_usage_metrics_map[pod_namespace].add_usage_metrics(pod_usage_metrics.instance_variable_get(:@cpu_limit).to_s + ('m'), pod_usage_metrics.instance_variable_get(:@cpu_request).to_s + ('m'),
                                                                            pod_usage_metrics.instance_variable_get(:@memory_limit).to_s + ('Mi'), pod_usage_metrics.instance_variable_get(:@memory_request).to_s + ('Mi'))
@@ -454,7 +457,7 @@ module Fluent
           begin
             @client.discover unless @client.discovered
             @client.rest_client['/nodes'].tap do |endpoint|
-              log.info("Use URL #{endpoint.url} for scraping node metrics")
+              log.debug("Use URL #{endpoint.url} for scraping node metrics")
             end
           end
       end
@@ -462,6 +465,9 @@ module Fluent
       def scrape_node_metrics
         response = node_api.get(@client.headers)
         handle_node_response(response)
+      rescue StandardError => e
+        log.error "Failed to get node metrics, error=#{$ERROR_INFO}, #{e.inspect}"
+        log.error_backtrace
       end
       # This method is used to handle responses from the kubeapiserver api
@@ -474,7 +480,7 @@ module Fluent
           log.error "ExMultiJson.load(response.body) expected 2xx from summary API, but got #{response.code}. Response body = #{response.body}"
         end
       rescue StandardError => e
-        log.error "Failed to scrape metrics, error=#{$ERROR_INFO}, #{e.inspect}"
+        log.error "Failed to scrape node metrics, error=#{$ERROR_INFO}, #{e.inspect}"
         log.error_backtrace
       end
@@ -527,7 +533,7 @@ module Fluent
           begin
             @client.discover unless @client.discovered
             @client.rest_client['/nodes'].tap do |endpoint|
-              log.info("Use URL #{endpoint.url} for scraping node metrics")
+              log.debug("Use URL #{endpoint.url} for scraping node metrics")
             end
           end
       end
@@ -535,6 +541,9 @@ module Fluent
       def scrape_resource_usage_metrics
         response = resource_usage_api.get(@client.headers)
         handle_resource_usage_response(response)
+       rescue StandardError => e
+         log.error "Failed to get resource usage metrics, error=#{$ERROR_INFO}, #{e.inspect}"
+         log.error_backtrace
       end
       # This method is used to handle responses from the kubelet summary api
@@ -547,7 +556,7 @@ module Fluent
           log.error "ExMultiJson.load(response.body) expected 2xx from summary API, but got #{response.code}. Response body = #{response.body}"
         end
       rescue StandardError => e
-        log.error "Failed to scrape metrics, error=#{$ERROR_INFO}, #{e.inspect}"
+        log.error "Failed to scrape resource usage metrics, error=#{$ERROR_INFO}, #{e.inspect}"
         log.error_backtrace
       end
@@ -560,13 +569,13 @@ module Fluent
               begin
                 @client.discover unless @client.discovered
                 @client.rest_client["/nodes/#{node_name}:#{@kubelet_port}/proxy/stats/summary"].tap do |endpoint|
-                  log.info("Use URL #{endpoint.url} for scraping resource usage metrics")
+                  log.debug("Use URL #{endpoint.url} for scraping resource usage metrics")
                 end
               end
             node_response = JSON.parse(node_rest_client.get(@client.headers))
             Array(node_response['pods']).each do |pod_json|
-              pod_cpu_usage = pod_json['cpu']['usageNanoCores']
+              pod_cpu_usage = pod_json['cpu']['usageNanoCores']/ 1_000_000
               pod_memory_usage = pod_json['memory']['usageBytes']
               pod_namespace = pod_json['podRef']['namespace']
               pod_usage = ResourceUsageMetricsUnit.new

data/test/helper.rb CHANGED

@@ -37,33 +37,16 @@ module PluginTestHelper
     k8s_url + '/v1/nodes/generics-aws-node-three:10255/proxy/stats/summary'
   end
-  def stub_api_port_10250
-    WebMock.stub_request(:get, 'https://node.fakedestination.com:10250/api')
-           .with(
-             headers: {
-               'Accept' => '*/*',
-               'Accept-Encoding' => 'gzip, deflate',
-               'Host' => 'node.fakedestination.com:10250'
-             }
-           )
-           .to_return(status: 200,
-                      body: File.open(File.expand_path('../v1.json', __FILE__)),
-                      headers: {})
-  end
   def stub_api_port_10255
     WebMock.stub_request(:get,
                          'https://node.fakedestination.com:10255/api')
            .with(
              headers: {
-               'Accept' => '*/*',
-               'Accept-Encoding' => 'gzip, deflate',
                'Host' => 'node.fakedestination.com:10255'
              }
            )
            .to_return(status: 200,
-                      body: File.open(File.expand_path('../v1.json', __FILE__)),
-                      headers: {})
+                      body: File.open(File.expand_path('../v1.json', __FILE__)))
   end
   def stub_api_v1
@@ -71,29 +54,27 @@ module PluginTestHelper
                          'https://node.fakedestination.com:10255/api/v1')
            .with(
              headers: {
-               'Accept' => '*/*',
-               'Accept-Encoding' => 'gzip, deflate',
                'Host' => 'node.fakedestination.com:10255'
              }
            )
            .to_return(status: 200,
-                      body: File.open(File.expand_path('../v1.json', __FILE__)),
-                      headers: {})
-  end
+                      body: File.open(File.expand_path('../v1.json', __FILE__)))
+  end
+  def stub_api_pods(timeout=false)
+    get_pods = WebMock.stub_request(:get,
+                                    'https://node.fakedestination.com:10255/api/v1/pods')
+                   .with(
+                       headers: {
+                           'Host' => 'node.fakedestination.com:10255'
+                       }
+                   )
+    if timeout
+      get_pods = get_pods.to_timeout.then
+    end
-  def stub_api_pods
-    WebMock.stub_request(:get,
-                         'https://node.fakedestination.com:10255/api/v1/pods')
-           .with(
-             headers: {
-               'Accept' => '*/*',
-               'Accept-Encoding' => 'gzip, deflate',
-               'Host' => 'node.fakedestination.com:10255'
-             }
-           )
-           .to_return(status: 200,
-                      body: File.open(File.expand_path('../pods.json', __FILE__)),
-                      headers: {})
+    get_pods.to_return(status: 200,
+                       body: File.open(File.expand_path('../pods.json', __FILE__)))
   end
   def stub_api_node_1
@@ -101,29 +82,28 @@ module PluginTestHelper
                          'https://node.fakedestination.com:10255/api/v1/nodes/generics-aws-node-one:10255/proxy/stats/summary')
            .with(
              headers: {
-               'Accept' => '*/*',
-               'Accept-Encoding' => 'gzip, deflate',
                'Host' => 'node.fakedestination.com:10255'
              }
            )
            .to_return(status: 200,
-                      body: File.open(File.expand_path('../node1.json', __FILE__)),
-                      headers: {})
+                      body: File.open(File.expand_path('../node1.json', __FILE__)))
   end
-  def stub_api_node_2
-    WebMock.stub_request(:get,
-                         'https://node.fakedestination.com:10255/api/v1/nodes/generics-aws-node-two:10255/proxy/stats/summary')
+  def stub_api_node_2(timeout=false)
+    get_node_summary = WebMock.stub_request(:get,
+                                            'https://node.fakedestination.com:10255/api/v1/nodes/generics-aws-node-two:10255/proxy/stats/summary')
            .with(
              headers: {
-               'Accept' => '*/*',
-               'Accept-Encoding' => 'gzip, deflate',
                'Host' => 'node.fakedestination.com:10255'
              }
            )
-           .to_return(status: 200,
-                      body: File.open(File.expand_path('../node2.json', __FILE__)),
-                      headers: {})
+    if timeout
+      get_node_summary = get_node_summary.to_timeout
+    end
+    get_node_summary.to_return(status: 200,
+                      body: File.open(File.expand_path('../node2.json', __FILE__)))
   end
   def stub_api_node_3
@@ -131,40 +111,45 @@ module PluginTestHelper
                          'https://node.fakedestination.com:10255/api/v1/nodes/generics-aws-node-three:10255/proxy/stats/summary')
            .with(
              headers: {
-               'Accept' => '*/*',
-               'Accept-Encoding' => 'gzip, deflate',
                'Host' => 'node.fakedestination.com:10255'
              }
            )
            .to_return(status: 200,
-                      body: File.open(File.expand_path('../node3.json', __FILE__)),
-                      headers: {})
+                      body: File.open(File.expand_path('../node3.json', __FILE__)))
   end
-  def stub_api_nodes
-    WebMock.stub_request(:get,
-                         'https://node.fakedestination.com:10255/api/v1/nodes')
-           .with(
-             headers: {
-               'Accept' => '*/*',
-               'Accept-Encoding' => 'gzip, deflate',
-               'Host' => 'node.fakedestination.com:10255'
-             }
-           )
-           .to_return(status: 200,
-                      body: File.open(File.expand_path('../nodes.json', __FILE__)),
-                      headers: {})
+  def stub_api_nodes(timeout=false)
+    get_nodes = WebMock.stub_request(:get, 'https://node.fakedestination.com:10255/api/v1/nodes')
+                    .with(
+                        headers: {
+                            'Host' => 'node.fakedestination.com:10255'
+                        }
+                    )
+    if timeout
+      get_nodes = get_nodes.to_timeout.times(2) # Nodes endpoint is called from two timers so must fail in both cases
+    end
+    get_nodes.to_return(status: 200,
+               body: File.open(File.expand_path('../nodes.json', __FILE__)))
   end
-  def stub_k8s_requests
-    stub_api_port_10250
+  def stub_k8s_init_requests
+    WebMock.reset!
+    stub_api_port_10255
+  end
+  def stub_k8s_requests(nodes_timeout: false, node_summary_timeout: false, pods_timeout: false)
+    WebMock.reset!
     stub_api_port_10255
     stub_api_v1
-    stub_api_pods
-    stub_api_nodes
+    stub_api_pods(pods_timeout)
+    stub_api_nodes(nodes_timeout)
     stub_api_node_1
+    stub_api_node_2(node_summary_timeout)
     stub_api_node_3
-    stub_api_node_2
   end
   def get_parsed_file(file_name)

data/test/plugin/test_in_kubernetes_metrics_aggregator.rb CHANGED

@@ -5,10 +5,6 @@ class KubernetesMetricsAggInputTest < Test::Unit::TestCase
   include Fluent::Test::Helpers
   include PluginTestHelper
-  @driver = nil
-  @driver_test = nil
-  @@hash_map_test = {}
   ZERO_CONFIG = %([
   ]).freeze
@@ -34,20 +30,22 @@ class KubernetesMetricsAggInputTest < Test::Unit::TestCase
   METRIC_TEST_CONFIG = %([
       kubernetes_url https://node.fakedestination.com
       kubelet_port 10255
+      interval 5s
+      tag kube.*
+  ]).freeze
+  TIMEOUT_TEST_CONFIG = %([
+      kubernetes_url https://node.fakedestination.com
+      kubelet_port 10255
+      interval 5s
       tag kube.*
   ]).freeze
   setup do
     Fluent::Test.setup
     ENV['KUBERNETES_SERVICE_HOST'] = "node.fakedestination.com"
     ENV['KUBERNETES_SERVICE_PORT'] = "10255"
-    stub_k8s_requests
-    @driver = create_driver(METRIC_TEST_CONFIG)
-    @driver.run timeout: 20, expect_emits: 200, shutdown: false
-    @driver.events.each do |tag, time, record|
-      @@hash_map_test[tag] = tag, time, record
-    end
   end
   def create_driver(conf = BASIC_CONFIG)
@@ -60,6 +58,8 @@ class KubernetesMetricsAggInputTest < Test::Unit::TestCase
   sub_test_case 'default parameter configuration' do
     test 'test default params' do
+      stub_k8s_init_requests
       d = create_driver(ZERO_CONFIG)
       assert_equal 10_250, d.instance.kubelet_port
       assert_equal 'kubernetes.metrics.*', d.instance.tag
@@ -78,141 +78,174 @@ class KubernetesMetricsAggInputTest < Test::Unit::TestCase
   sub_test_case 'modify parameter changes' do
     test 'test kubelet_port and supplied kubernetes URL parameters' do
+      stub_k8s_init_requests
       d = create_driver(ADVANCED_CONFIG_NO_CERTS)
       assert_equal 'https://node.fakedestination.com', d.instance.kubernetes_url
       assert_equal 10_255, d.instance.kubelet_port
     end
     test 'test tag and interval parameters' do
+      stub_k8s_init_requests
       d = create_driver(ADVANCED_CONFIG_NO_CERTS)
       assert_equal 'test.tag.check', d.instance.tag
       assert_equal 120, d.instance.interval
     end
     test 'test insecure_ssl and cluster_name parameters ' do
+      stub_k8s_init_requests
       d = create_driver(ADVANCED_CONFIG_NO_CERTS)
       assert_true d.instance.insecure_ssl
       assert_equal 'awesome_cluster', d.instance.cluster_name
     end
   end
-  sub_test_case 'Test metrics exist, limits_request_scraper - limits' do
-    test 'Testing kube.container.memory.limit' do
-      assert_true @@hash_map_test.key?('kube.container.memory.limit')
-    end
-    test 'Testing kube.namespace.cpu.limit' do
-      assert_true @@hash_map_test.key?('kube.namespace.cpu.limit')
-    end
-    test 'Testing kube.namespace.memory.limit	' do
-      assert_true @@hash_map_test.key?('kube.namespace.memory.limit')
-    end
-    test 'Testing kube.container.cpu.limit' do
-      assert_true @@hash_map_test.key?('kube.container.cpu.limit')
-    end
-    test 'Testing kube.pod.cpu.limit' do
-      assert_true @@hash_map_test.key?('kube.pod.cpu.limit')
-    end
-    test 'Testing kube.cluster.memory.limit	' do
-      assert_true @@hash_map_test.key?('kube.cluster.memory.limit')
-    end
-    test 'Testing kube.pod.memory.limit	' do
-      assert_true @@hash_map_test.key?('kube.pod.memory.limit')
-    end
-    test 'Testing kube.cluster.cpu.limit' do
-      assert_true @@hash_map_test.key?('kube.cluster.cpu.limit')
+  sub_test_case 'Test metrics exist' do
+    test 'Testing all expected metrics are emitted' do
+      stub_k8s_requests
+      hash_map_test = {}
+      d = create_driver(METRIC_TEST_CONFIG)
+      d.run timeout: 12, expect_emits: 200, shutdown: false
+      d.events.each do |tag, time, record|
+        hash_map_test[tag] = tag, time, record
+      end
+      # Test metrics exist, limits_request_scraper - limits
+      assert_true hash_map_test.key?('kube.namespace.cpu.limit')
+      assert_true hash_map_test.key?('kube.namespace.memory.limit')
+      assert_true hash_map_test.key?('kube.container.cpu.limit')
+      assert_true hash_map_test.key?('kube.pod.cpu.limit')
+      assert_true hash_map_test.key?('kube.cluster.memory.limit')
+      assert_true hash_map_test.key?('kube.pod.memory.limit')
+      assert_true hash_map_test.key?('kube.cluster.cpu.limit')
+      # Test metrics exist, limits_request_scraper - request
+      assert_true hash_map_test.key?('kube.cluster.memory.request')
+      assert_true hash_map_test.key?('kube.container.memory.request')
+      assert_true hash_map_test.key?('kube.pod.memory.request')
+      assert_true hash_map_test.key?('kube.namespace.memory.request')
+      assert_true hash_map_test.key?('kube.container.cpu.request')
+      assert_true hash_map_test.key?('kube.namespace.cpu.request')
+      assert_true hash_map_test.key?('kube.pod.cpu.request')
+      assert_true hash_map_test.key?('kube.cluster.cpu.request')
+      # Test metrics exist, node_scraper/resource_usage_scraper 1
+      assert_true hash_map_test.key?('kube.node.cpu.capacity')
+      assert_true hash_map_test.key?('kube.node.memory.capacity')
+      assert_true hash_map_test.key?('kube.node.memory.allocatable')
+      assert_true hash_map_test.key?('kube.node.cpu.utilization')
+      assert_true hash_map_test.key?('kube.node.memory.reservation')
+      assert_true hash_map_test.key?('kube.node.memory.utilization')
+      # Test metrics exist, node_scraper/resource_usage_scraper 2
+      assert_true hash_map_test.key?('kube.namespace.memory.usage')
+      assert_true hash_map_test.key?('kube.cluster.memory.usage')
+      assert_true hash_map_test.key?('kube.namespace.cpu.usage')
+      assert_true hash_map_test.key?('kube.node.cpu.allocatable')
+      assert_true hash_map_test.key?('kube.node.cpu.reservation')
+      assert_true hash_map_test.key?('kube.cluster.cpu.usage')
+      d.instance_shutdown
     end
   end
-  sub_test_case 'Test metrics exist, limits_request_scraper - request' do
-    test 'Testing kube.cluster.memory.request	' do
-      assert_true @@hash_map_test.key?('kube.cluster.memory.request')
-    end
+  sub_test_case 'Test handles request timeouts' do
-    test 'Testing kube.container.memory.request' do
-      assert_true @@hash_map_test.key?('kube.container.memory.request')
-    end
+    test 'Testing event count with nodes call timeout' do
+      stub_k8s_requests(nodes_timeout: true)
-    test 'Testing kube.pod.memory.request' do
-      assert_true @@hash_map_test.key?('kube.pod.memory.request')
-    end
+      namespace_event_count = 0
+      pod_event_count = 0
+      node_event_count = 0
-    test 'Testing kube.namespace.memory.request	' do
-      assert_true @@hash_map_test.key?('kube.namespace.memory.request')
-    end
+      d = create_driver(TIMEOUT_TEST_CONFIG)
+      # Should run for two intervals, the first call to node 1 which has the only 'default' namespace pod should timeout the first time
+      d.run timeout: 12, expect_emits: 500, shutdown: false
-    test 'Testing kube.container.cpu.request' do
-      assert_true @@hash_map_test.key?('kube.container.cpu.request')
-    end
+      d.events.each do |tag, _time, record|
+        # Limit to one events that should be emitted once per interval
+        if tag == 'kube.pod.cpu.limit' && record['name'] == 'new-metrics-test-final-splunk-kubernetes-metrics-fgszl'
+          pod_event_count += 1
+        end
+        if tag == 'kube.namespace.cpu.usage' && record['name'] == 'kube-system'
+          namespace_event_count += 1
+        end
+        if tag == 'kube.node.cpu.capacity' && record['node'] == 'generics-aws-node-one'
+          node_event_count += 1
+        end
+      end
-    test 'Testing kube.namespace.cpu.request' do
-      assert_true @@hash_map_test.key?('kube.namespace.cpu.request')
-    end
+      # 2 intervals - first call times out but timer continues emitting successfully next interval
+      assert_equal 1, node_event_count, 'Number of node events emitted was wrong'
+      # 2 intervals - first call times out but timer continues emitting successfully next interval
+      assert_equal 1, namespace_event_count, 'Number of namespace events emitted was wrong'
+      # 2 intervals - not timeouts
+      assert_equal 2, pod_event_count, 'Number of pod events emitted was wrong'
-    test 'Testing kube.pod.cpu.request' do
-      assert_true @@hash_map_test.key?('kube.pod.cpu.request')
+      d.instance_shutdown
     end
-    test 'Testing kube.cluster.cpu.request' do
-      assert_true @@hash_map_test.key?('kube.cluster.cpu.request')
-    end
-  end
+    test 'Testing event count with pods call timeout' do
+      stub_k8s_requests(pods_timeout: true)
-  sub_test_case 'Test metrics exist, node_scraper/resource_usage_scraper 1' do
-    test 'Testing kube.node.cpu.capacity' do
-      assert_true @@hash_map_test.key?('kube.node.cpu.capacity')
-    end
+      pod_event_count = 0
+      node_event_count = 0
-    test 'Testing kube.node.memory.capacity	' do
-      assert_true @@hash_map_test.key?('kube.node.memory.capacity')
-    end
+      d = create_driver(TIMEOUT_TEST_CONFIG)
+      # Should run for two intervals, the first call to node 1 which has the only 'default' namespace pod should timeout the first time
+      d.run timeout: 12, expect_emits: 500, shutdown: false
-    test 'Testing kube.node.memory.allocatable' do
-      assert_true @@hash_map_test.key?('kube.node.memory.allocatable')
-    end
+      d.events.each do |tag, _time, record|
+        # Limit to one events that should be emitted once per interval
+        if tag == 'kube.pod.cpu.limit' && record['name'] == 'new-metrics-test-final-splunk-kubernetes-metrics-fgszl'
+          pod_event_count += 1
+        end
-    test 'Testing kube.node.cpu.utilization	' do
-      assert_true @@hash_map_test.key?('kube.node.cpu.utilization')
-    end
+        if tag == 'kube.node.cpu.utilization' && record['node'] == 'generics-aws-node-one'
+          node_event_count += 1
+        end
+      end
-    test 'Testing kube.node.memory.reservation' do
-      assert_true @@hash_map_test.key?('kube.node.memory.reservation')
-    end
+      # 2 intervals - first call times out but timer continues emitting successfully next interval
+      assert_equal 1, pod_event_count, 'Number of pod events emitted was wrong'
+      # 2 intervals - not timeouts
+      assert_equal 2, node_event_count, 'Number of namespace events emitted was wrong'
-    test 'Testing kube.node.memory.utilization' do
-      assert_true @@hash_map_test.key?('kube.node.memory.utilization')
+      d.instance_shutdown
     end
-  end
-  sub_test_case 'Test metrics exist, node_scraper/resource_usage_scraper 2' do
-    test 'Testing kube.namespace.memory.usage	' do
-      assert_true @@hash_map_test.key?('kube.namespace.memory.usage')
-    end
+    test 'Testing event count with node summary call timeout' do
+      stub_k8s_requests(node_summary_timeout: true)
-    test 'Testing kube.cluster.memory.usage' do
-      assert_true @@hash_map_test.key?('kube.cluster.memory.usage')
-    end
+      namespace_event_count = 0
+      pod_event_count = 0
-    test 'Testing kube.namespace.cpu.usage' do
-      assert_true @@hash_map_test.key?('kube.namespace.cpu.usage')
-    end
+      d = create_driver(TIMEOUT_TEST_CONFIG)
+      # Should run for two intervals, the first call to node 1 which has the only 'default' namespace pod should timeout the first time
+      d.run timeout: 12, expect_emits: 500, shutdown: false
-    test 'Testing kube.node.cpu.allocatable	' do
-      assert_true @@hash_map_test.key?('kube.node.cpu.allocatable')
-    end
+      d.events.each do |tag, _time, record|
+        # Limit to one events that should be emitted once per interval
+        if tag == 'kube.namespace.cpu.usage' && record['name'] == 'kube-system'
+          namespace_event_count += 1
+        end
+        if tag == 'kube.pod.cpu.limit' && record['name'] == 'new-metrics-test-final-splunk-kubernetes-metrics-fgszl'
+          pod_event_count += 1
+        end
+      end
-    test 'Testing kube.node.cpu.reservation	' do
-      assert_true @@hash_map_test.key?('kube.node.cpu.reservation')
-    end
+      # 2 intervals - first call times out but timer continues emitting successfully next interval
+      assert_equal 1, namespace_event_count, 'Number of namespace events emitted was wrong'
+      # 2 intervals - not timeouts
+      assert_equal 2, pod_event_count, 'Number of pod events emitted was wrong'
-    test 'Testing kube.cluster.cpu.usage' do
-      assert_true @@hash_map_test.key?('kube.cluster.cpu.usage')
+      d.instance_shutdown
     end
   end
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-k8s-metrics-agg
 version: !ruby/object:Gem::Version
-  version: 1.1.0
+  version: 1.1.1
 platform: ruby
 authors:
 - Splunk Inc.
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-03-07 00:00:00.000000000 Z
+date: 2019-10-15 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -147,10 +147,18 @@ files:
 - ".circleci/build_and_push.sh"
 - ".circleci/build_and_push_to_dockerhub.sh"
 - ".circleci/build_and_push_to_github_release.sh"
+- ".circleci/check_version_trigger_release.sh"
 - ".circleci/config.yml"
+- ".circleci/gem_credentials"
 - ".circleci/install_dep.sh"
 - ".circleci/push_gem.sh"
+- ".circleci/push_gem_to_rubygems.sh"
+- ".github/ISSUE_TEMPLATE/bug_report.md"
+- ".github/ISSUE_TEMPLATE/enhancement_request.md"
+- ".github/ISSUE_TEMPLATE/failing_test.md"
+- ".github/PULL_REQUEST_TEMPLATE.md"
 - ".gitignore"
+- CLA.md
 - CONTRIBUTING.md
 - CONTRIBUTORS.md
 - Gemfile
@@ -161,6 +169,7 @@ files:
 - VERSION
 - docker/CONTRIBUTING.md
 - docker/Dockerfile
+- docker/FLUENTD_HEC_GEM_VERSION
 - docker/LICENSE
 - docker/README.md
 - docker/entrypoint.sh
@@ -194,7 +203,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.1
+rubygems_version: 3.0.3
 signing_key:
 specification_version: 4
 summary: A fluentd input plugin that collects kubernetes cluster metrics.