outhad-integrations 0.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +43 -0
- data/.ruby-version +1 -0
- data/.vscode/settings.json +5 -0
- data/README.md +76 -0
- data/Rakefile +12 -0
- data/lib/outhad/integrations/config.rb +14 -0
- data/lib/outhad/integrations/core/base_connector.rb +79 -0
- data/lib/outhad/integrations/core/constants.rb +103 -0
- data/lib/outhad/integrations/core/destination_connector.rb +20 -0
- data/lib/outhad/integrations/core/fullrefresher.rb +19 -0
- data/lib/outhad/integrations/core/http_client.rb +17 -0
- data/lib/outhad/integrations/core/http_helper.rb +36 -0
- data/lib/outhad/integrations/core/query_builder.rb +33 -0
- data/lib/outhad/integrations/core/rate_limiter.rb +19 -0
- data/lib/outhad/integrations/core/source_connector.rb +66 -0
- data/lib/outhad/integrations/core/streaming_http_client.rb +21 -0
- data/lib/outhad/integrations/core/unstructured_source_connector.rb +52 -0
- data/lib/outhad/integrations/core/utils.rb +123 -0
- data/lib/outhad/integrations/core/vector_source_connector.rb +14 -0
- data/lib/outhad/integrations/destination/airtable/client.rb +157 -0
- data/lib/outhad/integrations/destination/airtable/config/catalog.json +6 -0
- data/lib/outhad/integrations/destination/airtable/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/airtable/config/spec.json +23 -0
- data/lib/outhad/integrations/destination/airtable/icon.svg +6 -0
- data/lib/outhad/integrations/destination/airtable/schema_helper.rb +141 -0
- data/lib/outhad/integrations/destination/ais_data_store/client.rb +130 -0
- data/lib/outhad/integrations/destination/ais_data_store/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/ais_data_store/config/spec.json +68 -0
- data/lib/outhad/integrations/destination/ais_data_store/icon.svg +4 -0
- data/lib/outhad/integrations/destination/amazon_s3/client.rb +92 -0
- data/lib/outhad/integrations/destination/amazon_s3/config/catalog.json +16 -0
- data/lib/outhad/integrations/destination/amazon_s3/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/amazon_s3/config/spec.json +56 -0
- data/lib/outhad/integrations/destination/amazon_s3/icon.svg +34 -0
- data/lib/outhad/integrations/destination/databricks_lakehouse/client.rb +147 -0
- data/lib/outhad/integrations/destination/databricks_lakehouse/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/databricks_lakehouse/config/spec.json +44 -0
- data/lib/outhad/integrations/destination/databricks_lakehouse/icon.svg +65 -0
- data/lib/outhad/integrations/destination/facebook_custom_audience/client.rb +125 -0
- data/lib/outhad/integrations/destination/facebook_custom_audience/config/catalog.json +42 -0
- data/lib/outhad/integrations/destination/facebook_custom_audience/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/facebook_custom_audience/config/spec.json +28 -0
- data/lib/outhad/integrations/destination/facebook_custom_audience/icon.svg +23 -0
- data/lib/outhad/integrations/destination/google_sheets/client.rb +240 -0
- data/lib/outhad/integrations/destination/google_sheets/config/catalog.json +6 -0
- data/lib/outhad/integrations/destination/google_sheets/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/google_sheets/config/spec.json +75 -0
- data/lib/outhad/integrations/destination/google_sheets/icon.svg +1 -0
- data/lib/outhad/integrations/destination/http/client.rb +106 -0
- data/lib/outhad/integrations/destination/http/config/catalog.json +16 -0
- data/lib/outhad/integrations/destination/http/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/http/config/spec.json +24 -0
- data/lib/outhad/integrations/destination/http/icon.svg +9 -0
- data/lib/outhad/integrations/destination/hubspot/client.rb +122 -0
- data/lib/outhad/integrations/destination/hubspot/config/catalog.json +351 -0
- data/lib/outhad/integrations/destination/hubspot/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/hubspot/config/spec.json +18 -0
- data/lib/outhad/integrations/destination/hubspot/icon.svg +5 -0
- data/lib/outhad/integrations/destination/iterable/client.rb +111 -0
- data/lib/outhad/integrations/destination/iterable/config/catalog.json +47 -0
- data/lib/outhad/integrations/destination/iterable/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/iterable/config/spec.json +19 -0
- data/lib/outhad/integrations/destination/iterable/icon.svg +71 -0
- data/lib/outhad/integrations/destination/klaviyo/client.rb +119 -0
- data/lib/outhad/integrations/destination/klaviyo/config/catalog.json +103 -0
- data/lib/outhad/integrations/destination/klaviyo/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/klaviyo/config/spec.json +24 -0
- data/lib/outhad/integrations/destination/klaviyo/icon.svg +6 -0
- data/lib/outhad/integrations/destination/mailchimp/client.rb +141 -0
- data/lib/outhad/integrations/destination/mailchimp/config/catalog.json +142 -0
- data/lib/outhad/integrations/destination/mailchimp/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/mailchimp/config/spec.json +28 -0
- data/lib/outhad/integrations/destination/mailchimp/icon.svg +4 -0
- data/lib/outhad/integrations/destination/maria_db/client.rb +114 -0
- data/lib/outhad/integrations/destination/maria_db/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/maria_db/config/spec.json +48 -0
- data/lib/outhad/integrations/destination/maria_db/icon.svg +15 -0
- data/lib/outhad/integrations/destination/microsoft_dynamics/client.rb +150 -0
- data/lib/outhad/integrations/destination/microsoft_dynamics/config/catalog.json +161 -0
- data/lib/outhad/integrations/destination/microsoft_dynamics/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/microsoft_dynamics/config/spec.json +35 -0
- data/lib/outhad/integrations/destination/microsoft_dynamics/icon.svg +2 -0
- data/lib/outhad/integrations/destination/microsoft_excel/client.rb +198 -0
- data/lib/outhad/integrations/destination/microsoft_excel/config/catalog.json +7 -0
- data/lib/outhad/integrations/destination/microsoft_excel/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/microsoft_excel/config/spec.json +19 -0
- data/lib/outhad/integrations/destination/microsoft_excel/icon.svg +18 -0
- data/lib/outhad/integrations/destination/microsoft_sql/client.rb +137 -0
- data/lib/outhad/integrations/destination/microsoft_sql/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/microsoft_sql/config/spec.json +68 -0
- data/lib/outhad/integrations/destination/microsoft_sql/icon.svg +22 -0
- data/lib/outhad/integrations/destination/odoo/client.rb +109 -0
- data/lib/outhad/integrations/destination/odoo/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/odoo/config/spec.json +39 -0
- data/lib/outhad/integrations/destination/odoo/icon.svg +21 -0
- data/lib/outhad/integrations/destination/oracle_db/client.rb +112 -0
- data/lib/outhad/integrations/destination/oracle_db/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/oracle_db/config/spec.json +47 -0
- data/lib/outhad/integrations/destination/oracle_db/icon.svg +4 -0
- data/lib/outhad/integrations/destination/pinecone_db/client.rb +154 -0
- data/lib/outhad/integrations/destination/pinecone_db/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/pinecone_db/config/spec.json +32 -0
- data/lib/outhad/integrations/destination/pinecone_db/icon.svg +1 -0
- data/lib/outhad/integrations/destination/postgresql/client.rb +130 -0
- data/lib/outhad/integrations/destination/postgresql/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/postgresql/config/spec.json +68 -0
- data/lib/outhad/integrations/destination/postgresql/icon.svg +20 -0
- data/lib/outhad/integrations/destination/qdrant/client.rb +184 -0
- data/lib/outhad/integrations/destination/qdrant/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/qdrant/config/spec.json +23 -0
- data/lib/outhad/integrations/destination/qdrant/icon.svg +1 -0
- data/lib/outhad/integrations/destination/salesforce_consumer_goods_cloud/client.rb +136 -0
- data/lib/outhad/integrations/destination/salesforce_consumer_goods_cloud/config/catalog.json +6 -0
- data/lib/outhad/integrations/destination/salesforce_consumer_goods_cloud/config/meta.json +16 -0
- data/lib/outhad/integrations/destination/salesforce_consumer_goods_cloud/config/spec.json +52 -0
- data/lib/outhad/integrations/destination/salesforce_consumer_goods_cloud/icon.svg +16 -0
- data/lib/outhad/integrations/destination/salesforce_consumer_goods_cloud/schema_helper.rb +132 -0
- data/lib/outhad/integrations/destination/salesforce_crm/client.rb +114 -0
- data/lib/outhad/integrations/destination/salesforce_crm/config/catalog.json +320 -0
- data/lib/outhad/integrations/destination/salesforce_crm/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/salesforce_crm/config/spec.json +46 -0
- data/lib/outhad/integrations/destination/salesforce_crm/icon.svg +16 -0
- data/lib/outhad/integrations/destination/sftp/client.rb +186 -0
- data/lib/outhad/integrations/destination/sftp/config/catalog.json +16 -0
- data/lib/outhad/integrations/destination/sftp/config/meta.json +16 -0
- data/lib/outhad/integrations/destination/sftp/config/spec.json +73 -0
- data/lib/outhad/integrations/destination/sftp/icon.svg +1 -0
- data/lib/outhad/integrations/destination/slack/client.rb +125 -0
- data/lib/outhad/integrations/destination/slack/config/catalog.json +22 -0
- data/lib/outhad/integrations/destination/slack/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/slack/config/spec.json +23 -0
- data/lib/outhad/integrations/destination/slack/icon.svg +26 -0
- data/lib/outhad/integrations/destination/stripe/client.rb +94 -0
- data/lib/outhad/integrations/destination/stripe/config/catalog.json +128 -0
- data/lib/outhad/integrations/destination/stripe/config/meta.json +15 -0
- data/lib/outhad/integrations/destination/stripe/config/spec.json +18 -0
- data/lib/outhad/integrations/destination/stripe/icon.svg +10 -0
- data/lib/outhad/integrations/destination/zendesk/client.rb +132 -0
- data/lib/outhad/integrations/destination/zendesk/config/catalog.json +110 -0
- data/lib/outhad/integrations/destination/zendesk/config/meta.json +18 -0
- data/lib/outhad/integrations/destination/zendesk/config/spec.json +32 -0
- data/lib/outhad/integrations/destination/zendesk/icon.svg +63 -0
- data/lib/outhad/integrations/protocol/protocol.json +189 -0
- data/lib/outhad/integrations/protocol/protocol.rb +228 -0
- data/lib/outhad/integrations/rollout.rb +66 -0
- data/lib/outhad/integrations/service.rb +55 -0
- data/lib/outhad/integrations/source/amazon_s3/client.rb +235 -0
- data/lib/outhad/integrations/source/amazon_s3/config/meta.json +16 -0
- data/lib/outhad/integrations/source/amazon_s3/config/spec.json +119 -0
- data/lib/outhad/integrations/source/amazon_s3/icon.svg +34 -0
- data/lib/outhad/integrations/source/anthropic/client.rb +135 -0
- data/lib/outhad/integrations/source/anthropic/config/catalog.json +6 -0
- data/lib/outhad/integrations/source/anthropic/config/meta.json +16 -0
- data/lib/outhad/integrations/source/anthropic/config/spec.json +56 -0
- data/lib/outhad/integrations/source/anthropic/icon.svg +1 -0
- data/lib/outhad/integrations/source/aws_athena/client.rb +109 -0
- data/lib/outhad/integrations/source/aws_athena/config/meta.json +16 -0
- data/lib/outhad/integrations/source/aws_athena/config/spec.json +63 -0
- data/lib/outhad/integrations/source/aws_athena/icon.svg +22 -0
- data/lib/outhad/integrations/source/aws_bedrock_model/client.rb +91 -0
- data/lib/outhad/integrations/source/aws_bedrock_model/config/catalog.json +6 -0
- data/lib/outhad/integrations/source/aws_bedrock_model/config/meta.json +16 -0
- data/lib/outhad/integrations/source/aws_bedrock_model/config/spec.json +58 -0
- data/lib/outhad/integrations/source/aws_bedrock_model/icon.svg +1 -0
- data/lib/outhad/integrations/source/aws_sagemaker_model/client.rb +79 -0
- data/lib/outhad/integrations/source/aws_sagemaker_model/config/catalog.json +6 -0
- data/lib/outhad/integrations/source/aws_sagemaker_model/config/meta.json +16 -0
- data/lib/outhad/integrations/source/aws_sagemaker_model/config/spec.json +52 -0
- data/lib/outhad/integrations/source/aws_sagemaker_model/icon.svg +7 -0
- data/lib/outhad/integrations/source/bigquery/client.rb +98 -0
- data/lib/outhad/integrations/source/bigquery/config/meta.json +16 -0
- data/lib/outhad/integrations/source/bigquery/config/spec.json +83 -0
- data/lib/outhad/integrations/source/bigquery/icon.svg +1 -0
- data/lib/outhad/integrations/source/clickhouse/client.rb +102 -0
- data/lib/outhad/integrations/source/clickhouse/config/meta.json +16 -0
- data/lib/outhad/integrations/source/clickhouse/config/spec.json +42 -0
- data/lib/outhad/integrations/source/clickhouse/icon.svg +25 -0
- data/lib/outhad/integrations/source/databricks/client.rb +98 -0
- data/lib/outhad/integrations/source/databricks/config/meta.json +17 -0
- data/lib/outhad/integrations/source/databricks/config/spec.json +56 -0
- data/lib/outhad/integrations/source/databricks/icon.svg +19 -0
- data/lib/outhad/integrations/source/databrics_model/client.rb +89 -0
- data/lib/outhad/integrations/source/databrics_model/config/catalog.json +6 -0
- data/lib/outhad/integrations/source/databrics_model/config/meta.json +17 -0
- data/lib/outhad/integrations/source/databrics_model/config/spec.json +63 -0
- data/lib/outhad/integrations/source/databrics_model/icon.svg +19 -0
- data/lib/outhad/integrations/source/firecrawl/client.rb +151 -0
- data/lib/outhad/integrations/source/firecrawl/config/catalog.json +29 -0
- data/lib/outhad/integrations/source/firecrawl/config/meta.json +17 -0
- data/lib/outhad/integrations/source/firecrawl/config/spec.json +31 -0
- data/lib/outhad/integrations/source/firecrawl/icon.svg +4 -0
- data/lib/outhad/integrations/source/generic_open_ai/client.rb +118 -0
- data/lib/outhad/integrations/source/generic_open_ai/config/catalog.json +6 -0
- data/lib/outhad/integrations/source/generic_open_ai/config/meta.json +16 -0
- data/lib/outhad/integrations/source/generic_open_ai/config/spec.json +63 -0
- data/lib/outhad/integrations/source/generic_open_ai/icon.svg +6 -0
- data/lib/outhad/integrations/source/google_vertex_model/client.rb +83 -0
- data/lib/outhad/integrations/source/google_vertex_model/config/catalog.json +6 -0
- data/lib/outhad/integrations/source/google_vertex_model/config/meta.json +17 -0
- data/lib/outhad/integrations/source/google_vertex_model/config/spec.json +105 -0
- data/lib/outhad/integrations/source/google_vertex_model/icon.svg +2 -0
- data/lib/outhad/integrations/source/http_model/client.rb +108 -0
- data/lib/outhad/integrations/source/http_model/config/catalog.json +6 -0
- data/lib/outhad/integrations/source/http_model/config/meta.json +16 -0
- data/lib/outhad/integrations/source/http_model/config/spec.json +70 -0
- data/lib/outhad/integrations/source/http_model/icon.svg +9 -0
- data/lib/outhad/integrations/source/intuit_quick_books/client.rb +213 -0
- data/lib/outhad/integrations/source/intuit_quick_books/config/catalog.json +6 -0
- data/lib/outhad/integrations/source/intuit_quick_books/config/meta.json +17 -0
- data/lib/outhad/integrations/source/intuit_quick_books/config/spec.json +44 -0
- data/lib/outhad/integrations/source/intuit_quick_books/icon.svg +1 -0
- data/lib/outhad/integrations/source/maria_db/client.rb +92 -0
- data/lib/outhad/integrations/source/maria_db/config/meta.json +16 -0
- data/lib/outhad/integrations/source/maria_db/config/spec.json +48 -0
- data/lib/outhad/integrations/source/maria_db/icon.svg +15 -0
- data/lib/outhad/integrations/source/odoo/client.rb +106 -0
- data/lib/outhad/integrations/source/odoo/config/meta.json +15 -0
- data/lib/outhad/integrations/source/odoo/config/spec.json +39 -0
- data/lib/outhad/integrations/source/odoo/icon.svg +21 -0
- data/lib/outhad/integrations/source/open_ai/client.rb +118 -0
- data/lib/outhad/integrations/source/open_ai/config/catalog.json +6 -0
- data/lib/outhad/integrations/source/open_ai/config/meta.json +16 -0
- data/lib/outhad/integrations/source/open_ai/config/spec.json +56 -0
- data/lib/outhad/integrations/source/open_ai/icon.svg +1 -0
- data/lib/outhad/integrations/source/oracle_db/client.rb +127 -0
- data/lib/outhad/integrations/source/oracle_db/config/meta.json +16 -0
- data/lib/outhad/integrations/source/oracle_db/config/spec.json +47 -0
- data/lib/outhad/integrations/source/oracle_db/icon.svg +4 -0
- data/lib/outhad/integrations/source/pinecone_db/client.rb +73 -0
- data/lib/outhad/integrations/source/pinecone_db/config/catalog.json +6 -0
- data/lib/outhad/integrations/source/pinecone_db/config/meta.json +16 -0
- data/lib/outhad/integrations/source/pinecone_db/config/spec.json +34 -0
- data/lib/outhad/integrations/source/pinecone_db/icon.svg +2 -0
- data/lib/outhad/integrations/source/postgresql/client.rb +112 -0
- data/lib/outhad/integrations/source/postgresql/config/meta.json +16 -0
- data/lib/outhad/integrations/source/postgresql/config/spec.json +86 -0
- data/lib/outhad/integrations/source/postgresql/icon.svg +20 -0
- data/lib/outhad/integrations/source/qdrant/client.rb +86 -0
- data/lib/outhad/integrations/source/qdrant/config/catalog.json +6 -0
- data/lib/outhad/integrations/source/qdrant/config/meta.json +16 -0
- data/lib/outhad/integrations/source/qdrant/config/spec.json +29 -0
- data/lib/outhad/integrations/source/qdrant/icon.svg +1 -0
- data/lib/outhad/integrations/source/redshift/client.rb +109 -0
- data/lib/outhad/integrations/source/redshift/config/meta.json +16 -0
- data/lib/outhad/integrations/source/redshift/config/spec.json +71 -0
- data/lib/outhad/integrations/source/redshift/icon.svg +15 -0
- data/lib/outhad/integrations/source/salesforce_consumer_goods_cloud/client.rb +133 -0
- data/lib/outhad/integrations/source/salesforce_consumer_goods_cloud/config/catalog.json +6 -0
- data/lib/outhad/integrations/source/salesforce_consumer_goods_cloud/config/meta.json +18 -0
- data/lib/outhad/integrations/source/salesforce_consumer_goods_cloud/config/spec.json +53 -0
- data/lib/outhad/integrations/source/salesforce_consumer_goods_cloud/icon.svg +16 -0
- data/lib/outhad/integrations/source/salesforce_consumer_goods_cloud/schema_helper.rb +130 -0
- data/lib/outhad/integrations/source/sftp/client.rb +133 -0
- data/lib/outhad/integrations/source/sftp/config/meta.json +16 -0
- data/lib/outhad/integrations/source/sftp/config/spec.json +59 -0
- data/lib/outhad/integrations/source/sftp/icon.svg +1 -0
- data/lib/outhad/integrations/source/snowflake/client.rb +92 -0
- data/lib/outhad/integrations/source/snowflake/config/meta.json +16 -0
- data/lib/outhad/integrations/source/snowflake/config/spec.json +82 -0
- data/lib/outhad/integrations/source/snowflake/icon.svg +10 -0
- data/lib/outhad/integrations/source/watsonx_ai/client.rb +194 -0
- data/lib/outhad/integrations/source/watsonx_ai/config/catalog.json +6 -0
- data/lib/outhad/integrations/source/watsonx_ai/config/meta.json +16 -0
- data/lib/outhad/integrations/source/watsonx_ai/config/spec.json +74 -0
- data/lib/outhad/integrations/source/watsonx_ai/icon.svg +1 -0
- data/lib/outhad/integrations/source/watsonx_data/client.rb +146 -0
- data/lib/outhad/integrations/source/watsonx_data/config/meta.json +17 -0
- data/lib/outhad/integrations/source/watsonx_data/config/spec.json +72 -0
- data/lib/outhad/integrations/source/watsonx_data/icon.svg +1 -0
- data/lib/outhad/integrations.rb +129 -0
- data/outhad-integrations.gemspec +79 -0
- data/sig/outhad/integrations.rbs +6 -0
- metadata +866 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
{
|
2
|
+
"data": {
|
3
|
+
"name": "Clickhouse",
|
4
|
+
"title": "ClickHouse",
|
5
|
+
"connector_type": "source",
|
6
|
+
"category": "Data Warehouse",
|
7
|
+
"sub_category": "Relational Database",
|
8
|
+
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/clickhouse",
|
9
|
+
"github_issue_label": "source-clickhouse",
|
10
|
+
"icon": "icon.svg",
|
11
|
+
"license": "MIT",
|
12
|
+
"release_stage": "alpha",
|
13
|
+
"support_level": "community",
|
14
|
+
"tags": ["language:ruby", "outhad"]
|
15
|
+
}
|
16
|
+
}
|
@@ -0,0 +1,42 @@
|
|
1
|
+
{
|
2
|
+
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/clickhouse",
|
3
|
+
"stream_type": "dynamic",
|
4
|
+
"connector_query_type": "raw_sql",
|
5
|
+
"connection_specification": {
|
6
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
7
|
+
"title": "ClickHouse",
|
8
|
+
"type": "object",
|
9
|
+
"required": ["url", "username", "password", "database"],
|
10
|
+
"properties": {
|
11
|
+
"url": {
|
12
|
+
"description": "The ClickHouse host url to connect.",
|
13
|
+
"examples": ["tu61szglca.us-west-2.aws.clickhouse.cloud"],
|
14
|
+
"type": "string",
|
15
|
+
"title": "Personal URL",
|
16
|
+
"order": 0
|
17
|
+
},
|
18
|
+
"username": {
|
19
|
+
"description": "The username for ClickHouse.",
|
20
|
+
"examples": ["Default"],
|
21
|
+
"type": "string",
|
22
|
+
"title": "Username",
|
23
|
+
"order": 1
|
24
|
+
},
|
25
|
+
"password": {
|
26
|
+
"description": "The password for ClickHouse.",
|
27
|
+
"examples": ["Default"],
|
28
|
+
"type": "string",
|
29
|
+
"outhad_secret": true,
|
30
|
+
"title": "Password",
|
31
|
+
"order": 2
|
32
|
+
},
|
33
|
+
"database": {
|
34
|
+
"description": "The ClickHouse database.",
|
35
|
+
"examples": ["default"],
|
36
|
+
"type": "string",
|
37
|
+
"title": "Database",
|
38
|
+
"order": 3
|
39
|
+
}
|
40
|
+
}
|
41
|
+
}
|
42
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
<svg height="100" width="100" version="1.1" id="Layer_1" xmlns:x="ns_extend;" xmlns:i="ns_ai;" xmlns:graph="ns_graphs;" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" viewBox="0 0 50.6 50.6" style="enable-background:new 0 0 50.6 50.6;" xml:space="preserve">
|
2
|
+
<metadata>
|
3
|
+
<sfw xmlns="ns_sfw;">
|
4
|
+
<slices>
|
5
|
+
</slices>
|
6
|
+
<sliceSourceBounds bottomLeftOrigin="true" height="50.6" width="50.6" x="0" y="0">
|
7
|
+
</sliceSourceBounds>
|
8
|
+
</sfw>
|
9
|
+
</metadata>
|
10
|
+
<g>
|
11
|
+
<g>
|
12
|
+
<path d="M0.6,0H5c0.3,0,0.6,0.3,0.6,0.6V50c0,0.3-0.3,0.6-0.6,0.6H0.6C0.3,50.6,0,50.4,0,50V0.6C0,0.3,0.3,0,0.6,0z" fill="yellow">
|
13
|
+
</path>
|
14
|
+
<path d="M11.8,0h4.4c0.3,0,0.6,0.3,0.6,0.6V50c0,0.3-0.3,0.6-0.6,0.6h-4.4c-0.3,0-0.6-0.3-0.6-0.6V0.6C11.3,0.3,11.5,0,11.8,0z" fill="yellow">
|
15
|
+
</path>
|
16
|
+
<path d="M23.1,0h4.4c0.3,0,0.6,0.3,0.6,0.6V50c0,0.3-0.3,0.6-0.6,0.6h-4.4c-0.3,0-0.6-0.3-0.6-0.6V0.6C22.5,0.3,22.8,0,23.1,0z" fill="yellow">
|
17
|
+
</path>
|
18
|
+
<path d="M34.3,0h4.4c0.3,0,0.6,0.3,0.6,0.6V50c0,0.3-0.3,0.6-0.6,0.6h-4.4c-0.3,0-0.6-0.3-0.6-0.6V0.6C33.7,0.3,34,0,34.3,0z" fill="yellow">
|
19
|
+
</path>
|
20
|
+
<path d="M45.6,19.7H50c0.3,0,0.6,0.3,0.6,0.6v10.1c0,0.3-0.3,0.6-0.6,0.6h-4.4c-0.3,0-0.6-0.3-0.6-0.6V20.3
|
21
|
+
C45,20,45.3,19.7,45.6,19.7z" fill="yellow">
|
22
|
+
</path>
|
23
|
+
</g>
|
24
|
+
</g>
|
25
|
+
</svg>
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Outhad::Integrations::Source
|
4
|
+
module Databricks
|
5
|
+
include Outhad::Integrations::Core
|
6
|
+
class Client < SourceConnector
|
7
|
+
def check_connection(connection_config)
|
8
|
+
connection_config = connection_config.with_indifferent_access
|
9
|
+
create_connection(connection_config)
|
10
|
+
ConnectionStatus.new(status: ConnectionStatusType["succeeded"]).to_outhad_message
|
11
|
+
rescue Sequel::DatabaseConnectionError => e
|
12
|
+
ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_outhad_message
|
13
|
+
end
|
14
|
+
|
15
|
+
def discover(connection_config)
|
16
|
+
connection_config = connection_config.with_indifferent_access
|
17
|
+
query = "SELECT table_name, column_name, data_type, is_nullable
|
18
|
+
FROM system.information_schema.columns
|
19
|
+
WHERE table_schema = \'#{connection_config[:schema]}\' AND table_catalog = \'#{connection_config[:catalog]}\'
|
20
|
+
ORDER BY table_name, ordinal_position;"
|
21
|
+
|
22
|
+
db = create_connection(connection_config)
|
23
|
+
|
24
|
+
records = []
|
25
|
+
db.fetch(query.gsub("\n", "")) do |row|
|
26
|
+
records << row
|
27
|
+
end
|
28
|
+
catalog = Catalog.new(streams: create_streams(records))
|
29
|
+
catalog.to_outhad_message
|
30
|
+
rescue StandardError => e
|
31
|
+
handle_exception(e, {
|
32
|
+
context: "DATABRICKS:DISCOVER:EXCEPTION",
|
33
|
+
type: "error"
|
34
|
+
})
|
35
|
+
end
|
36
|
+
|
37
|
+
def read(sync_config)
|
38
|
+
connection_config = sync_config.source.connection_specification
|
39
|
+
connection_config = connection_config.with_indifferent_access
|
40
|
+
query = sync_config.model.query
|
41
|
+
query = batched_query(query, sync_config.limit, sync_config.offset) unless sync_config.limit.nil? && sync_config.offset.nil?
|
42
|
+
|
43
|
+
db = create_connection(connection_config)
|
44
|
+
|
45
|
+
query(db, query)
|
46
|
+
rescue StandardError => e
|
47
|
+
handle_exception(e, {
|
48
|
+
context: "DATABRICKS:READ:EXCEPTION",
|
49
|
+
type: "error",
|
50
|
+
sync_id: sync_config.sync_id,
|
51
|
+
sync_run_id: sync_config.sync_run_id
|
52
|
+
})
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def query(connection, query)
|
58
|
+
records = []
|
59
|
+
connection.fetch(query) do |row|
|
60
|
+
records << RecordMessage.new(data: row, emitted_at: Time.now.to_i).to_outhad_message
|
61
|
+
end
|
62
|
+
records
|
63
|
+
end
|
64
|
+
|
65
|
+
def create_connection(connection_config)
|
66
|
+
Sequel.odbc(drvconnect: generate_drvconnect(connection_config))
|
67
|
+
end
|
68
|
+
|
69
|
+
def generate_drvconnect(connection_config)
|
70
|
+
"Driver=#{DATABRICKS_DRIVER_PATH};
|
71
|
+
Host=#{connection_config[:host]};
|
72
|
+
PORT=#{connection_config[:port]};
|
73
|
+
SSL=1;
|
74
|
+
HTTPPath=#{connection_config[:http_path]};
|
75
|
+
PWD=#{connection_config[:access_token]};
|
76
|
+
UID=token;
|
77
|
+
ThriftTransport=2;AuthMech=3;
|
78
|
+
AllowSelfSignedServerCert=1;
|
79
|
+
CAIssuedCertNamesMismatch=1"
|
80
|
+
end
|
81
|
+
|
82
|
+
def create_streams(records)
|
83
|
+
group_by_table(records).map do |r|
|
84
|
+
Outhad::Integrations::Protocol::Stream.new(name: r[:tablename], action: StreamAction["fetch"], json_schema: convert_to_json_schema(r[:columns]))
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def group_by_table(records)
|
89
|
+
records.group_by { |entry| entry[:table_name] }.map do |table_name, columns|
|
90
|
+
{
|
91
|
+
tablename: table_name,
|
92
|
+
columns: columns.map { |column| { column_name: column[:column_name], type: column[:data_type], optional: column[:is_nullable] == "YES" } }
|
93
|
+
}
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
{
|
2
|
+
"data": {
|
3
|
+
"name": "Databricks",
|
4
|
+
"title": "Databricks Data Warehouse",
|
5
|
+
"connector_type": "source",
|
6
|
+
"category": "Data Warehouse",
|
7
|
+
"sub_category": "Relational Database",
|
8
|
+
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/databricks",
|
9
|
+
"github_issue_label": "source-databricks",
|
10
|
+
"icon": "icon.svg",
|
11
|
+
"license": "MIT",
|
12
|
+
"release_stage": "alpha",
|
13
|
+
"support_level": "community",
|
14
|
+
"tags": ["language:ruby", "outhad"]
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
{
|
2
|
+
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/databricks",
|
3
|
+
"stream_type": "dynamic",
|
4
|
+
"connector_query_type": "raw_sql",
|
5
|
+
"connection_specification": {
|
6
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
7
|
+
"title": "Databricks Datawarehouse",
|
8
|
+
"type": "object",
|
9
|
+
"required": ["host", "port", "http_path", "catalog", "schema"],
|
10
|
+
"properties": {
|
11
|
+
"host": {
|
12
|
+
"title": "Server Hostname",
|
13
|
+
"description": "Server host name for the Databricks Cluster. It is different from the SQL Endpoint Cluster.",
|
14
|
+
"type": "string",
|
15
|
+
"examples": ["abc-12345678-wxyz.cloud.databricks.com"],
|
16
|
+
"order": 0
|
17
|
+
},
|
18
|
+
"port": {
|
19
|
+
"title": "Port",
|
20
|
+
"description": "",
|
21
|
+
"type": "string",
|
22
|
+
"default": "443",
|
23
|
+
"order": 1
|
24
|
+
},
|
25
|
+
"access_token": {
|
26
|
+
"title": "Personal Access Token",
|
27
|
+
"description": "",
|
28
|
+
"type": "string",
|
29
|
+
"outhad_secret": true,
|
30
|
+
"order": 2
|
31
|
+
},
|
32
|
+
"http_path": {
|
33
|
+
"title": "HTTP Path",
|
34
|
+
"description": "",
|
35
|
+
"examples": ["sql/protocolvx/o/1234567489/0000-1111111-abcd90"],
|
36
|
+
"type": "string",
|
37
|
+
"order": 3
|
38
|
+
},
|
39
|
+
"catalog": {
|
40
|
+
"description": "The name of the catalog",
|
41
|
+
"default": "hive_metastore",
|
42
|
+
"type": "string",
|
43
|
+
"title": "Databricks catalog",
|
44
|
+
"order": 4
|
45
|
+
},
|
46
|
+
"schema": {
|
47
|
+
"description": "The default schema tables are written.",
|
48
|
+
"default": "default",
|
49
|
+
"type": "string",
|
50
|
+
"title": "Database schema",
|
51
|
+
"order": 5
|
52
|
+
}
|
53
|
+
}
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
<svg version="1.1" id="Layer_1" xmlns:x="ns_extend;" xmlns:i="ns_ai;" xmlns:graph="ns_graphs;" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" viewBox="0 0 40.1 42" style="enable-background:new 0 0 40.1 42;" xml:space="preserve">
|
2
|
+
<style type="text/css">
|
3
|
+
.st0{fill:#FF3621;}
|
4
|
+
</style>
|
5
|
+
<metadata>
|
6
|
+
<sfw xmlns="ns_sfw;">
|
7
|
+
<slices>
|
8
|
+
</slices>
|
9
|
+
<sliceSourceBounds bottomLeftOrigin="true" height="42" width="40.1" x="-69.1" y="-10.5">
|
10
|
+
</sliceSourceBounds>
|
11
|
+
</sfw>
|
12
|
+
</metadata>
|
13
|
+
<g>
|
14
|
+
<path class="st0" d="M40.1,31.1v-7.4l-0.8-0.5L20.1,33.7l-18.2-10l0-4.3l18.2,9.9l20.1-10.9v-7.3l-0.8-0.5L20.1,21.2L2.6,11.6
|
15
|
+
L20.1,2l14.1,7.7l1.1-0.6V8.3L20.1,0L0,10.9V12L20.1,23l18.2-10v4.4l-18.2,10L0.8,16.8L0,17.3v7.4l20.1,10.9l18.2-9.9v4.3l-18.2,10
|
16
|
+
L0.8,29.5L0,30v1.1L20.1,42L40.1,31.1z">
|
17
|
+
</path>
|
18
|
+
</g>
|
19
|
+
</svg>
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Outhad::Integrations::Source
|
4
|
+
module DatabricksModel
|
5
|
+
include Outhad::Integrations::Core
|
6
|
+
class Client < SourceConnector
|
7
|
+
def check_connection(connection_config)
|
8
|
+
connection_config = connection_config.with_indifferent_access
|
9
|
+
url = build_url(DATABRICKS_HEALTH_URL, connection_config)
|
10
|
+
response = Outhad::Integrations::Core::HttpClient.request(
|
11
|
+
url,
|
12
|
+
HTTP_GET,
|
13
|
+
headers: auth_headers(connection_config[:token])
|
14
|
+
)
|
15
|
+
if success?(response)
|
16
|
+
success_status
|
17
|
+
else
|
18
|
+
failure_status(nil)
|
19
|
+
end
|
20
|
+
rescue StandardError => e
|
21
|
+
ConnectionStatus.new(status: ConnectionStatusType["failed"], message: e.message).to_outhad_message
|
22
|
+
end
|
23
|
+
|
24
|
+
def discover(_connection_config = nil)
|
25
|
+
catalog_json = read_json(CATALOG_SPEC_PATH)
|
26
|
+
catalog = build_catalog(catalog_json)
|
27
|
+
catalog.to_outhad_message
|
28
|
+
rescue StandardError => e
|
29
|
+
handle_exception(e, {
|
30
|
+
context: "DATABRICKS MODEL:DISCOVER:EXCEPTION",
|
31
|
+
type: "error"
|
32
|
+
})
|
33
|
+
end
|
34
|
+
|
35
|
+
def read(sync_config)
|
36
|
+
connection_config = sync_config.source.connection_specification
|
37
|
+
connection_config = connection_config.with_indifferent_access
|
38
|
+
# The server checks the ConnectorQueryType.
|
39
|
+
# If it's "ai_ml," the server calculates the payload and passes it as a query in the sync config model protocol.
|
40
|
+
# This query is then sent to the AI/ML model.
|
41
|
+
payload = JSON.parse(sync_config.model.query)
|
42
|
+
run_model(connection_config, payload)
|
43
|
+
rescue StandardError => e
|
44
|
+
handle_exception(e, {
|
45
|
+
context: "DATABRICKS MODEL:READ:EXCEPTION",
|
46
|
+
type: "error"
|
47
|
+
})
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def run_model(connection_config, payload)
|
53
|
+
connection_config = connection_config.with_indifferent_access
|
54
|
+
|
55
|
+
url = build_url(DATABRICKS_SERVING_URL, connection_config)
|
56
|
+
token = connection_config[:token]
|
57
|
+
|
58
|
+
response = send_request(
|
59
|
+
url: url,
|
60
|
+
http_method: HTTP_POST,
|
61
|
+
payload: payload,
|
62
|
+
headers: auth_headers(token),
|
63
|
+
config: connection_config[:config]
|
64
|
+
)
|
65
|
+
process_response(response)
|
66
|
+
rescue StandardError => e
|
67
|
+
handle_exception(e, context: "DATABRICKS MODEL:RUN_MODEL:EXCEPTION", type: "error")
|
68
|
+
end
|
69
|
+
|
70
|
+
def process_response(response)
|
71
|
+
if success?(response)
|
72
|
+
begin
|
73
|
+
data = JSON.parse(response.body)
|
74
|
+
[RecordMessage.new(data: data, emitted_at: Time.now.to_i).to_outhad_message]
|
75
|
+
rescue JSON::ParserError
|
76
|
+
create_log_message("DATABRICKS MODEL:RUN_MODEL", "error", "parsing failed: please send a valid payload")
|
77
|
+
end
|
78
|
+
else
|
79
|
+
create_log_message("DATABRICKS MODEL:RUN_MODEL", "error", "request failed: #{response.body}")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def build_url(url, connection_config)
|
84
|
+
format(url, databricks_host: connection_config[:databricks_host],
|
85
|
+
endpoint_name: connection_config[:endpoint])
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
{
|
2
|
+
"data": {
|
3
|
+
"name": "DatabricksModel",
|
4
|
+
"title": "Databricks Model",
|
5
|
+
"connector_type": "source",
|
6
|
+
"category": "AI Model",
|
7
|
+
"sub_category": "AI_ML Service",
|
8
|
+
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/databricks-model",
|
9
|
+
"github_issue_label": "source-databricks-foundation",
|
10
|
+
"icon": "icon.svg",
|
11
|
+
"license": "MIT",
|
12
|
+
"release_stage": "alpha",
|
13
|
+
"support_level": "community",
|
14
|
+
"tags": ["language:ruby", "outhad"]
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
{
|
2
|
+
"documentation_url": "https://docs.squared.ai/guides/sources/data-sources/databricks-model",
|
3
|
+
"stream_type": "user_defined",
|
4
|
+
"connector_query_type": "ai_ml",
|
5
|
+
"connection_specification": {
|
6
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
7
|
+
"title": "Databricks Model",
|
8
|
+
"type": "object",
|
9
|
+
"required": ["databricks_host", "token", "endpoint", "request_format", "response_format"],
|
10
|
+
"properties": {
|
11
|
+
"databricks_host": {
|
12
|
+
"title": "Databricks Host URL",
|
13
|
+
"description": "Endpoint host URL for the Databricks serving endpoint.",
|
14
|
+
"type": "string",
|
15
|
+
"examples": ["app.databricks.com"],
|
16
|
+
"order": 0
|
17
|
+
},
|
18
|
+
"token": {
|
19
|
+
"title": "Databricks Token",
|
20
|
+
"description": "personal access token",
|
21
|
+
"type": "string",
|
22
|
+
"outhad_secret": true,
|
23
|
+
"order": 1
|
24
|
+
},
|
25
|
+
"endpoint": {
|
26
|
+
"title": "Endpoint name",
|
27
|
+
"description": "Endpoint name",
|
28
|
+
"examples": ["databricks-dbrx-instruct"],
|
29
|
+
"type": "string",
|
30
|
+
"order": 2
|
31
|
+
},
|
32
|
+
"config": {
|
33
|
+
"title": "",
|
34
|
+
"type": "object",
|
35
|
+
"properties": {
|
36
|
+
"timeout": {
|
37
|
+
"type": "string",
|
38
|
+
"default": "30",
|
39
|
+
"title": "HTTP Timeout",
|
40
|
+
"description": "The maximum time, in seconds, to wait for a response from the server before the request is canceled.",
|
41
|
+
"order": 0
|
42
|
+
}
|
43
|
+
},
|
44
|
+
"order": 3
|
45
|
+
},
|
46
|
+
"request_format": {
|
47
|
+
"title": "Request Format",
|
48
|
+
"description": "Sample Request Format",
|
49
|
+
"type": "string",
|
50
|
+
"x-request-format": true,
|
51
|
+
"order": 4
|
52
|
+
},
|
53
|
+
"response_format": {
|
54
|
+
"title": "Response Format",
|
55
|
+
"description": "Sample Response Format",
|
56
|
+
"type": "string",
|
57
|
+
"x-response-format": true,
|
58
|
+
"order": 5
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
<svg version="1.1" id="Layer_1" xmlns:x="ns_extend;" xmlns:i="ns_ai;" xmlns:graph="ns_graphs;" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" viewBox="0 0 40.1 42" style="enable-background:new 0 0 40.1 42;" xml:space="preserve">
|
2
|
+
<style type="text/css">
|
3
|
+
.st0{fill:#FF3621;}
|
4
|
+
</style>
|
5
|
+
<metadata>
|
6
|
+
<sfw xmlns="ns_sfw;">
|
7
|
+
<slices>
|
8
|
+
</slices>
|
9
|
+
<sliceSourceBounds bottomLeftOrigin="true" height="42" width="40.1" x="-69.1" y="-10.5">
|
10
|
+
</sliceSourceBounds>
|
11
|
+
</sfw>
|
12
|
+
</metadata>
|
13
|
+
<g>
|
14
|
+
<path class="st0" d="M40.1,31.1v-7.4l-0.8-0.5L20.1,33.7l-18.2-10l0-4.3l18.2,9.9l20.1-10.9v-7.3l-0.8-0.5L20.1,21.2L2.6,11.6
|
15
|
+
L20.1,2l14.1,7.7l1.1-0.6V8.3L20.1,0L0,10.9V12L20.1,23l18.2-10v4.4l-18.2,10L0.8,16.8L0,17.3v7.4l20.1,10.9l18.2-9.9v4.3l-18.2,10
|
16
|
+
L0.8,29.5L0,30v1.1L20.1,42L40.1,31.1z">
|
17
|
+
</path>
|
18
|
+
</g>
|
19
|
+
</svg>
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Outhad::Integrations::Source
|
4
|
+
module Firecrawl
|
5
|
+
include Outhad::Integrations::Core
|
6
|
+
class Client < SourceConnector
|
7
|
+
def check_connection(connection_config)
|
8
|
+
connection_config = connection_config.with_indifferent_access
|
9
|
+
create_connection(connection_config)
|
10
|
+
request = crawl_activity
|
11
|
+
if success?(request)
|
12
|
+
success_status
|
13
|
+
else
|
14
|
+
failure_status(nil)
|
15
|
+
end
|
16
|
+
rescue StandardError => e
|
17
|
+
handle_exception(e, { context: "FIRECRAWL:CHECK_CONNECTION:EXCEPTION", type: "error" })
|
18
|
+
failure_status(e)
|
19
|
+
end
|
20
|
+
|
21
|
+
def discover(_connection_config = nil)
|
22
|
+
catalog_json = read_json(CATALOG_SPEC_PATH)
|
23
|
+
catalog = build_catalog(catalog_json)
|
24
|
+
catalog.to_outhad_message
|
25
|
+
rescue StandardError => e
|
26
|
+
handle_exception(e, { context: "FIRECRAWL:DISCOVER:EXCEPTION", type: "error" })
|
27
|
+
end
|
28
|
+
|
29
|
+
def read(sync_config)
|
30
|
+
connection_config = sync_config.source.connection_specification
|
31
|
+
connection_config = connection_config.with_indifferent_access
|
32
|
+
url = create_connection(connection_config)
|
33
|
+
query(url, nil, nil)
|
34
|
+
rescue StandardError => e
|
35
|
+
handle_exception(e, {
|
36
|
+
context: "FIRECRAWL:READ:EXCEPTION",
|
37
|
+
type: "error",
|
38
|
+
sync_id: sync_config.sync_id,
|
39
|
+
sync_run_id: sync_config.sync_run_id
|
40
|
+
})
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def create_connection(connection_config)
|
46
|
+
@base_url = connection_config[:base_url]
|
47
|
+
@api_key = connection_config[:api_key]
|
48
|
+
@config = if connection_config[:config].present?
|
49
|
+
connection_config[:config].transform_values do |value|
|
50
|
+
JSON.parse(value)
|
51
|
+
rescue JSON::ParserError
|
52
|
+
value
|
53
|
+
end
|
54
|
+
else
|
55
|
+
{}
|
56
|
+
end
|
57
|
+
@config[:url] ||= connection_config[:base_url]
|
58
|
+
FIRECRAWL_CRAWL_URL
|
59
|
+
end
|
60
|
+
|
61
|
+
def query(url, _query, limit = 1)
|
62
|
+
if limit.present?
|
63
|
+
if @config["includePaths"]&.any?
|
64
|
+
path = @config["includePaths"].first
|
65
|
+
@config["url"] = URI.join(@config["url"], path).to_s
|
66
|
+
end
|
67
|
+
@config.delete("includePaths")
|
68
|
+
@config[:limit] = limit
|
69
|
+
end
|
70
|
+
request = execute_crawl(url)
|
71
|
+
request = JSON.parse(request.body)
|
72
|
+
crawl_url = get_request_url(request)
|
73
|
+
response = get_crawl_result(crawl_url)
|
74
|
+
response["data"].map do |row|
|
75
|
+
metadata_json = row["metadata"].to_json if row["metadata"]
|
76
|
+
metadata_url = row["metadata"]["url"]
|
77
|
+
data = {
|
78
|
+
"metadata": metadata_json,
|
79
|
+
"markdown": row["markdown"],
|
80
|
+
"url": metadata_url,
|
81
|
+
"markdown_hash": Digest::MD5.hexdigest(row["markdown"]) # Placeholder for webscraping extractor use
|
82
|
+
}
|
83
|
+
RecordMessage.new(data: data, emitted_at: Time.now.to_i).to_outhad_message
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def execute_crawl(url)
|
88
|
+
send_request(
|
89
|
+
url: url,
|
90
|
+
http_method: HTTP_POST,
|
91
|
+
payload: JSON.parse(@config.to_json),
|
92
|
+
headers: auth_headers(@api_key),
|
93
|
+
config: {}
|
94
|
+
)
|
95
|
+
end
|
96
|
+
|
97
|
+
def crawl_activity
|
98
|
+
send_request(
|
99
|
+
url: FIRECRAWL_CRAWL_ACTIVE_URL,
|
100
|
+
http_method: HTTP_GET,
|
101
|
+
payload: {},
|
102
|
+
headers: auth_headers(@api_key),
|
103
|
+
config: {}
|
104
|
+
)
|
105
|
+
end
|
106
|
+
|
107
|
+
# This is to make sure the /crawl/{id} was returned in request.
|
108
|
+
# If not use /crawl/active to retrieve it.
|
109
|
+
def get_request_url(request)
|
110
|
+
if request["url"].blank?
|
111
|
+
if request["error"].present?
|
112
|
+
time = request["error"][/retry after (\d+)s/, 1].to_i
|
113
|
+
sleep(time)
|
114
|
+
execute_crawl(FIRECRAWL_CRAWL_URL)
|
115
|
+
end
|
116
|
+
active = crawl_activity
|
117
|
+
crawl_active = JSON.parse(active.body)
|
118
|
+
|
119
|
+
raise "Missing crawl result URL and no active crawl ID available." unless crawl_active["crawls"][-1]["id"].present?
|
120
|
+
|
121
|
+
crawl_id = crawl_active["crawls"][-1]["id"]
|
122
|
+
build_url(FIRECRAWL_GET_CRAWL_URL, crawl_id.to_s)
|
123
|
+
else
|
124
|
+
request["url"]
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Crawl job needs time to finish task. This method will check if the job is complete.
|
129
|
+
# If not sleep for 5 seconds and check again.
|
130
|
+
def get_crawl_result(url)
|
131
|
+
loop do
|
132
|
+
response = send_request(
|
133
|
+
url: url,
|
134
|
+
http_method: HTTP_GET,
|
135
|
+
payload: {},
|
136
|
+
headers: auth_headers(@api_key),
|
137
|
+
config: {}
|
138
|
+
)
|
139
|
+
response = JSON.parse(response.body)
|
140
|
+
return response if response["status"] != "scraping"
|
141
|
+
|
142
|
+
sleep(FIRECRAWL_REQUEST_RATE_LIMIT)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def build_url(url, id)
|
147
|
+
format(url, id: id)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
{
|
2
|
+
"request_rate_limit": 15,
|
3
|
+
"request_rate_limit_unit": "minute",
|
4
|
+
"request_rate_concurrency": 5,
|
5
|
+
"streams": [
|
6
|
+
{
|
7
|
+
"name": "scrape",
|
8
|
+
"action": "fetch",
|
9
|
+
"json_schema": {
|
10
|
+
"type": "object",
|
11
|
+
"properties": {
|
12
|
+
"markdown": {
|
13
|
+
"type": "string"
|
14
|
+
},
|
15
|
+
"metadata": {
|
16
|
+
"type": "string"
|
17
|
+
},
|
18
|
+
"url": {
|
19
|
+
"type": "string"
|
20
|
+
},
|
21
|
+
"markdown_hash": {
|
22
|
+
"type": "string"
|
23
|
+
}
|
24
|
+
}
|
25
|
+
},
|
26
|
+
"supported_sync_modes": ["incremental"]
|
27
|
+
}
|
28
|
+
]
|
29
|
+
}
|