@redpanda-data/docs-extensions-and-macros 4.4.1 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,77 @@
1
+ input:
2
+ # Use the 'generate' input
3
+ # https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
4
+ generate:
5
+ # The interval at which new records are generated.
6
+ interval: 1s
7
+ # The mapping section defines how each generated record is structured.
8
+ # The language used here is called Bloblang.
9
+ # https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
10
+ mapping: |
11
+ # Generate a fake first name using the 'first_name' faker function.
12
+ let first_name = fake("first_name")
13
+
14
+ # Generate a fake last name using the 'last_name' faker function.
15
+ let last_name = fake("last_name")
16
+
17
+ # Define possible subscription levels for users.
18
+ let subscription_levels = ["Free", "Basic", "Premium"]
19
+
20
+ # Define possible notification channels for user preferences.
21
+ let notifications = ["email", "sms", "push" ]
22
+
23
+ # Define supported languages for user preferences.
24
+ let languages = ["en", "es", "fr", "de", "zh", "jp"]
25
+
26
+ # Assign a unique user ID using a UUID digit generator.
27
+ root.user_id = fake("uuid_digit")
28
+
29
+ # Assign the generated first name to the 'first_name' field.
30
+ root.first_name = $first_name
31
+
32
+ # Assign the generated last name to the 'last_name' field.
33
+ root.last_name = $last_name
34
+
35
+ # Construct the user's email by combining the first initial, last name, and a fake domain name.
36
+ # The email is converted to lowercase for consistency.
37
+ root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
38
+
39
+ # Assign a fake registration date using the 'date' faker function.
40
+ root.registration_date = fake("date")
41
+
42
+ # Assign the current timestamp as the last login time.
43
+ root.last_login = now()
44
+
45
+ # Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
46
+ root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
47
+
48
+ # Randomly assign a language preference by selecting an index from the 'languages' array.
49
+ root.preferences.language = $languages.index(random_int(min: 0, max: 5))
50
+
51
+ # Randomly assign a notification preference by selecting an index from the 'notifications' array.
52
+ root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
53
+ pipeline:
54
+ processors:
55
+ - mapping: |
56
+ # Set the target topic for the generated records to 'profiles'.
57
+ meta topic = "profiles"
58
+
59
+ # Assign the entire record (root) to be sent to the specified topic.
60
+ root = this
61
+ output:
62
+ # Use the 'kafka_franz' output to send the result back to Redpanda
63
+ # https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
64
+ kafka_franz:
65
+ # Define the list of seed brokers for the Kafka cluster.
66
+ seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
67
+ # Dynamically assign the topic based on the metadata specified in the processors.
68
+ # In this case, it resolves to the 'profiles' topic.
69
+ topic: ${! metadata("topic") }
70
+ # Configure SASL authentication to securely connect to the Kafka brokers.
71
+ sasl:
72
+ - # Specify the SASL mechanism to use for authentication.
73
+ mechanism: SCRAM-SHA-256
74
+ # The password for the SASL authentication.
75
+ password: secretpassword
76
+ # The username for the SASL authentication.
77
+ username: superuser
@@ -0,0 +1,24 @@
1
+ # This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
2
+
3
+ # Configuration for connecting to the Kafka API of the Redpanda cluster.
4
+ kafka_api:
5
+ # SASL (Simple Authentication and Security Layer) settings for authentication.
6
+ sasl:
7
+ user: superuser # The username used for authentication
8
+ password: secretpassword # The password associated with the username
9
+ mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
10
+ # List of Kafka brokers in the Redpanda cluster.
11
+ # These brokers ensure high availability and fault tolerance for Kafka-based communication.
12
+ brokers:
13
+ - 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
14
+ - 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
15
+ - 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
16
+
17
+ # Configuration for connecting to the Redpanda Admin API.
18
+ # The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
19
+ admin_api:
20
+ # List of Admin API endpoints for managing the cluster.
21
+ addresses:
22
+ - 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
23
+ - 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
24
+ - 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644
@@ -0,0 +1,37 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "Transactions",
4
+ "type": "object",
5
+ "properties": {
6
+ "email": {
7
+ "type": "string",
8
+ "format": "email",
9
+ "description": "The email address of the user involved in the transaction."
10
+ },
11
+ "index": {
12
+ "type": "integer",
13
+ "description": "A numeric index associated with the transaction."
14
+ },
15
+ "price": {
16
+ "type": "string",
17
+ "pattern": "^[A-Z]{3} \\d+(?:\\.\\d{2})?$",
18
+ "description": "A string representing the price of the product, including a currency code (ISO 4217) and an amount with two decimal places by default."
19
+ },
20
+ "product_url": {
21
+ "type": "string",
22
+ "format": "uri",
23
+ "description": "A URL that points to the product involved in the transaction."
24
+ },
25
+ "timestamp": {
26
+ "type": "string",
27
+ "format": "date-time",
28
+ "description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
29
+ },
30
+ "user_id": {
31
+ "type": "integer",
32
+ "description": "A numeric identifier for the user."
33
+ }
34
+ },
35
+ "required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
36
+ "additionalProperties": false
37
+ }
@@ -0,0 +1,46 @@
1
+ # Transactions Topic Documentation
2
+
3
+ This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
4
+
5
+ ## Schema Overview
6
+
7
+ Each message in the `transactions` topic adheres to the following JSON schema:
8
+
9
+ ```json
10
+ {
11
+ "email": "string",
12
+ "index": "integer",
13
+ "price": "string",
14
+ "product_url": "string",
15
+ "timestamp": "string",
16
+ "user_id": "integer"
17
+ }
18
+ ```
19
+
20
+ - **email**: The email address of the user involved in the transaction.
21
+ - **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
22
+ - **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
23
+ - **product_url**: A URL that points to the product involved in the transaction.
24
+ - **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
25
+ - **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
26
+
27
+ ## Example message
28
+
29
+ ```json
30
+ {
31
+ "email": "wzieme@ykczius.edu",
32
+ "index": 0,
33
+ "price": "XXX 5651308.100000",
34
+ "product_url": "http://yjomdta.top/DxvGsCn.php",
35
+ "timestamp": "2024-08-16T15:51:19.799474084Z",
36
+ "user_id": 1
37
+ }
38
+ ```
39
+
40
+ ## Use cases
41
+
42
+ You can use the `transactions` topic for various purposes, including:
43
+
44
+ - **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
45
+ - **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
46
+ - **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.
@@ -0,0 +1,73 @@
1
+ = Modify the Wasm Transform in the Quickstart
2
+
3
+ This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
4
+ If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
5
+
6
+ However, if you want to customize the data transform logic, continue reading.
7
+
8
+ == Why customize the transform?
9
+
10
+ - **Custom filtering**: Filter by a different regex or apply multiple conditions.
11
+ - **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
12
+ - **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
13
+
14
+ == Prerequisites
15
+
16
+ You need the following:
17
+
18
+ - At least Go 1.20 installed.
19
+ +
20
+ [source,bash]
21
+ ----
22
+ go version
23
+ ----
24
+
25
+ - The Redpanda CLI (`rpk`) installed.
26
+
27
+ - A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
28
+
29
+ == Modify and deploy your transform
30
+
31
+ . Open link:transform.go[transform.go] and make your changes. For example:
32
+ +
33
+ --
34
+ - Change the regex logic to handle different use cases.
35
+ - Add environment variables to control new features.
36
+ - Extend the `doRegexFilter()` function to manipulate records.
37
+ --
38
+
39
+ . Compile your Go code into a `.wasm` file:
40
+ +
41
+ [source,bash]
42
+ ----
43
+ rpk transform build
44
+ ----
45
+ +
46
+ This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
47
+
48
+ . Deploy the new transform.
49
+ +
50
+ If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
51
+ +
52
+ Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
53
+
54
+ . Produce messages into the input topic. For example:
55
+ +
56
+ [source,bash]
57
+ ----
58
+ echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
59
+ ----
60
+
61
+ . Consume from the output topic. For example:
62
+ +
63
+ [source,bash]
64
+ ----
65
+ rpk topic consume edu-filtered-domains --num 1
66
+ ----
67
+
68
+ == Suggested reading
69
+
70
+ - link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
71
+ - link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
72
+ - https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
73
+ - https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.
@@ -0,0 +1,5 @@
1
+ module regex
2
+
3
+ go 1.20
4
+
5
+ require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0
@@ -0,0 +1,2 @@
1
+ github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
2
+ github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=
@@ -0,0 +1,122 @@
1
+ package main
2
+ // This data transform filters records based on a customizable regex pattern.
3
+ // If a record's key or value
4
+ // (determined by an environment variable) matches the specified regex,
5
+ // the record is forwarded to the output.
6
+ // Otherwise, it is dropped.
7
+ //
8
+ // Usage:
9
+ // 1. Provide the following environment variables in your Docker or configuration setup:
10
+ // - PATTERN : (required) a regular expression that determines what you want to match.
11
+ // - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
12
+ // the record key is checked. Default is false.
13
+ //
14
+ // Example environment variables:
15
+ // PATTERN=".*\\.edu$"
16
+ // MATCH_VALUE="true"
17
+ //
18
+ // Logs:
19
+ // This transform logs information about each record and whether it matched.
20
+ // The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
21
+ //
22
+ // Build instructions:
23
+ // go mod tidy
24
+ // rpk transform build
25
+ //
26
+ // For more details on building transforms with the Redpanda SDK, see:
27
+ // https://docs.redpanda.com/current/develop/data-transforms
28
+ //
29
+
30
+ import (
31
+ "log"
32
+ "os"
33
+ "regexp"
34
+ "strings"
35
+
36
+ "github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
37
+ )
38
+
39
+ var (
40
+ re *regexp.Regexp
41
+ checkValue bool
42
+ )
43
+
44
+ func isTrueVar(v string) bool {
45
+ switch strings.ToLower(v) {
46
+ case "yes", "ok", "1", "true":
47
+ return true
48
+ default:
49
+ return false
50
+ }
51
+ }
52
+
53
+ // The main() function runs only once at startup. It performs all initialization steps:
54
+ // - Reads and compiles the regex pattern.
55
+ // - Determines whether to match on the key or value.
56
+ // - Registers the doRegexFilter() function to process records.
57
+ func main() {
58
+ // Set logging preferences, including timestamp and UTC time.
59
+ log.SetPrefix("[regex-transform] ")
60
+ log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
61
+
62
+ // Start logging the transformation process
63
+ log.Println("Starting transform...")
64
+
65
+ // Read the PATTERN environment variable to get the regex pattern.
66
+ pattern, ok := os.LookupEnv("PATTERN")
67
+ if !ok {
68
+ log.Fatal("Missing PATTERN environment variable")
69
+ }
70
+ // Log the regex pattern being used.
71
+ log.Printf("Using PATTERN: %q\n", pattern)
72
+ // Compile the regex pattern for later use.
73
+ re = regexp.MustCompile(pattern)
74
+
75
+ // Read the MATCH_VALUE environment variable to determine whether to check the record's value.
76
+ mk, ok := os.LookupEnv("MATCH_VALUE")
77
+ checkValue = ok && isTrueVar(mk)
78
+ log.Printf("MATCH_VALUE set to: %t\n", checkValue)
79
+
80
+ log.Println("Initialization complete, waiting for records...")
81
+
82
+ // Listen for records to be written, calling doRegexFilter() for each record.
83
+ transform.OnRecordWritten(doRegexFilter)
84
+ }
85
+
86
+ // The doRegexFilter() function executes each time a new record is written.
87
+ // It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
88
+ // If it matches, the record is forwarded, if not, it's dropped.
89
+ func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
90
+ // This stores the data to be checked (either the key or value).
91
+ var dataToCheck []byte
92
+
93
+ // Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
94
+ if checkValue {
95
+ // Use the value of the record if MATCH_VALUE is true.
96
+ dataToCheck = e.Record().Value
97
+ log.Printf("Checking record value: %s\n", string(dataToCheck))
98
+ } else {
99
+ // Use the key of the record if MATCH_VALUE is false.
100
+ dataToCheck = e.Record().Key
101
+ log.Printf("Checking record key: %s\n", string(dataToCheck))
102
+ }
103
+
104
+ // If there is no key or value to check, log and skip the record.
105
+ if dataToCheck == nil {
106
+ log.Println("Record has no key/value to check, skipping.")
107
+ return nil
108
+ }
109
+
110
+ // Check if the data matches the regex pattern.
111
+ pass := re.Match(dataToCheck)
112
+ if pass {
113
+ // If the record matches the pattern, log and write the record to the output topic.
114
+ log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
115
+ return w.Write(e.Record())
116
+ } else {
117
+ // If the record does not match the pattern, log and drop the record.
118
+ log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
119
+ // Do not write the record if it doesn't match the pattern.
120
+ return nil
121
+ }
122
+ }
@@ -0,0 +1,33 @@
1
+ # Transform metadata used by the rpk transform build command.
2
+ # This metadata file tells rpk:
3
+ # 1) The transform’s display name, which also becomes the base for the .wasm file name.
4
+ # 2) A brief description of what it does.
5
+ # 3) Defaults for environment variables.
6
+ # 4) Input and output topics (if you want to define them here rather than in the deploy command).
7
+
8
+ # Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
9
+ name: regex
10
+
11
+ description: |
12
+ Filters the input topic to records that only match a regular expression.
13
+
14
+ Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
15
+ See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
16
+
17
+ Environment variables:
18
+ - PATTERN: The regular expression that will match against records (required).
19
+ - MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
20
+
21
+ # By default, no input topic is set here. (You can set it in your deploy command if preferred.)
22
+ input-topic: ""
23
+
24
+ # By default, no output topic is set here. (You can set it in your deploy command if preferred.)
25
+ output-topic: ""
26
+
27
+ # Indicates the specific TinyGo environment used to compile your transform.
28
+ language: tinygo-no-goroutines
29
+
30
+ env:
31
+ # The PATTERN variable must be provided at deploy time.
32
+ # Example: --var=PATTERN=".*@example.com"
33
+ PATTERN: '<required>'
@@ -44,18 +44,6 @@ cloud_storage_segment_max_upload_interval_sec: 60
44
44
  # Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
45
45
  # https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
46
46
  partition_autobalancing_mode: continuous
47
- # Enable Redpanda to collect consumer group metrics.
48
- # https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
49
- enable_consumer_group_metrics:
50
- - "group"
51
- - "partition"
52
- - "consumer_lag"
53
- # Lower the interval for the autogeneration of consumer group metrics.
54
- # https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
55
- consumer_group_lag_collection_interval_sec: 60
56
- # Enable Redpanda to collect host metrics.
57
- # https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
58
- enable_host_metrics: true
59
47
  # Enable for Iceberg metrics
60
48
  iceberg_enabled: true
61
49
  # Set up Iceberg REST catalog configuration
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@redpanda-data/docs-extensions-and-macros",
3
- "version": "4.4.1",
3
+ "version": "4.5.0",
4
4
  "description": "Antora extensions and macros developed for Redpanda documentation.",
5
5
  "keywords": [
6
6
  "antora",
@@ -12,8 +12,8 @@ async function loadOctokit() {
12
12
  : new Octokit();
13
13
 
14
14
  if (!process.env.VBOT_GITHUB_API_TOKEN) {
15
- console.warn(
16
- 'Warning: No GitHub token found (VBOT_GITHUB_API_TOKEN). API rate limits will be restricted.'
15
+ console.info(
16
+ 'No GitHub token found (VBOT_GITHUB_API_TOKEN).'
17
17
  );
18
18
  }
19
19
  }