datagovmy-python-sdk 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datagovmy_python_sdk-1.0.1/.claude/agents/security-code-reviewer.md +123 -0
- datagovmy_python_sdk-1.0.1/.github/workflows/ci.yml +35 -0
- datagovmy_python_sdk-1.0.1/.github/workflows/release.yml +48 -0
- datagovmy_python_sdk-1.0.1/.gitignore +14 -0
- datagovmy_python_sdk-1.0.1/.python-version +1 -0
- datagovmy_python_sdk-1.0.1/.vscode/settings.json +12 -0
- datagovmy_python_sdk-1.0.1/CHANGELOG.md +15 -0
- datagovmy_python_sdk-1.0.1/CLAUDE.md +103 -0
- datagovmy_python_sdk-1.0.1/PKG-INFO +63 -0
- datagovmy_python_sdk-1.0.1/README.md +55 -0
- datagovmy_python_sdk-1.0.1/datagovmy/__init__.py +3 -0
- datagovmy_python_sdk-1.0.1/datagovmy/core/__init__.py +0 -0
- datagovmy_python_sdk-1.0.1/datagovmy/core/api.py +76 -0
- datagovmy_python_sdk-1.0.1/datagovmy/datagovmy.py +22 -0
- datagovmy_python_sdk-1.0.1/datagovmy/service/__init__.py +4 -0
- datagovmy_python_sdk-1.0.1/datagovmy/service/data_catalogue.py +23 -0
- datagovmy_python_sdk-1.0.1/datagovmy/service/environment.py +2 -0
- datagovmy_python_sdk-1.0.1/datagovmy/service/exceptions.py +6 -0
- datagovmy_python_sdk-1.0.1/datagovmy/service/open_dosm.py +23 -0
- datagovmy_python_sdk-1.0.1/docs/usage.md +222 -0
- datagovmy_python_sdk-1.0.1/example.py +14 -0
- datagovmy_python_sdk-1.0.1/pyproject.toml +71 -0
- datagovmy_python_sdk-1.0.1/tests/core/__init__.py +0 -0
- datagovmy_python_sdk-1.0.1/tests/core/test_api.py +119 -0
- datagovmy_python_sdk-1.0.1/tests/test_datagovmy.py +47 -0
- datagovmy_python_sdk-1.0.1/uv.lock +292 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: security-code-reviewer
|
|
3
|
+
description: "Proactively use this agent when code has been written or modified and needs to be reviewed for security vulnerabilities, coding best practices, and potential issues. This includes after writing new functions, refactoring existing code, or implementing features that handle user input, authentication, data storage, or network communication.\\n\\nExamples:\\n\\n- User: \"Implement a login endpoint that accepts username and password\"\\n Assistant: \"Here is the login endpoint implementation:\"\\n <function call to write the code>\\n Since security-sensitive code was written (authentication), use the Agent tool to launch the security-code-reviewer agent to review the code for vulnerabilities.\\n Assistant: \"Now let me use the security-code-reviewer agent to review this code for security issues.\"\\n\\n- User: \"Add a function to query the database based on user input\"\\n Assistant: \"Here is the database query function:\"\\n <function call to write the code>\\n Since code handling user input and database queries was written, use the Agent tool to launch the security-code-reviewer agent to check for SQL injection and other vulnerabilities.\\n Assistant: \"Let me run the security-code-reviewer agent to ensure this is safe from injection attacks.\"\\n\\n- User: \"Can you review the changes I just made?\"\\n Assistant: \"I'll use the security-code-reviewer agent to review your recent changes.\"\\n Since the user explicitly requested a review, use the Agent tool to launch the security-code-reviewer agent."
|
|
4
|
+
tools: Bash, Glob, Grep, Read, WebFetch, WebSearch, Skill, TaskCreate, TaskGet, TaskUpdate, TaskList, EnterWorktree, ToolSearch
|
|
5
|
+
model: sonnet
|
|
6
|
+
color: red
|
|
7
|
+
memory: project
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
You are an elite security-focused code reviewer with deep expertise in application security, secure coding practices, and vulnerability assessment. You have extensive knowledge of OWASP Top 10, CWE classifications, and language-specific security pitfalls. You approach every review with the mindset of both a defensive developer and an offensive security researcher.
|
|
11
|
+
|
|
12
|
+
## Core Responsibilities
|
|
13
|
+
|
|
14
|
+
1. **Security Vulnerability Detection**: Identify vulnerabilities including but not limited to:
|
|
15
|
+
- Injection flaws (SQL, NoSQL, OS command, LDAP, XSS)
|
|
16
|
+
- Broken authentication and session management
|
|
17
|
+
- Sensitive data exposure (hardcoded secrets, insufficient encryption, logging PII)
|
|
18
|
+
- Insecure deserialization
|
|
19
|
+
- Broken access control and privilege escalation paths
|
|
20
|
+
- Server-side request forgery (SSRF)
|
|
21
|
+
- Path traversal and file inclusion
|
|
22
|
+
- Race conditions and TOCTOU bugs
|
|
23
|
+
- Unsafe use of cryptographic primitives
|
|
24
|
+
- Dependency vulnerabilities and supply chain risks
|
|
25
|
+
|
|
26
|
+
2. **Coding Best Practices**: Evaluate code for:
|
|
27
|
+
- Input validation and sanitization completeness
|
|
28
|
+
- Proper error handling (no information leakage in errors)
|
|
29
|
+
- Principle of least privilege adherence
|
|
30
|
+
- Defense in depth implementation
|
|
31
|
+
- Secure defaults
|
|
32
|
+
- DRY, SOLID, and clean code principles where they intersect with security
|
|
33
|
+
- Proper resource management (file handles, connections, memory)
|
|
34
|
+
- Type safety and null safety
|
|
35
|
+
|
|
36
|
+
3. **Code Quality with Security Implications**: Flag:
|
|
37
|
+
- Overly complex logic that obscures security-relevant behavior
|
|
38
|
+
- Missing or inadequate logging for security events
|
|
39
|
+
- Insufficient or misleading comments on security-critical sections
|
|
40
|
+
- Inconsistent error handling patterns
|
|
41
|
+
|
|
42
|
+
## Review Process
|
|
43
|
+
|
|
44
|
+
When reviewing code:
|
|
45
|
+
|
|
46
|
+
1. **Scope**: Focus on recently written or modified code. Read the relevant files and understand the context of changes.
|
|
47
|
+
2. **Analyze data flow**: Trace how user-controlled input flows through the code. Identify trust boundaries.
|
|
48
|
+
3. **Check each function/method**: Evaluate inputs, outputs, side effects, and error paths.
|
|
49
|
+
4. **Assess dependencies**: Note any use of external libraries and whether they are used securely.
|
|
50
|
+
5. **Consider the deployment context**: Think about how the code will run and what attack surface it exposes.
|
|
51
|
+
|
|
52
|
+
## Output Format
|
|
53
|
+
|
|
54
|
+
Structure your review as follows:
|
|
55
|
+
|
|
56
|
+
### Summary
|
|
57
|
+
A brief overall assessment (1-3 sentences) with a severity rating: ✅ Clean, ⚠️ Minor Issues, 🔶 Moderate Issues, 🔴 Critical Issues.
|
|
58
|
+
|
|
59
|
+
### Findings
|
|
60
|
+
For each issue found, provide:
|
|
61
|
+
- **Severity**: Critical / High / Medium / Low / Informational
|
|
62
|
+
- **Category**: (e.g., Injection, Auth, Data Exposure, Best Practice)
|
|
63
|
+
- **Location**: File and line reference
|
|
64
|
+
- **Description**: Clear explanation of the issue
|
|
65
|
+
- **Impact**: What could go wrong if exploited or left unaddressed
|
|
66
|
+
- **Recommendation**: Specific fix with code example when helpful
|
|
67
|
+
|
|
68
|
+
List findings in order of severity (critical first).
|
|
69
|
+
|
|
70
|
+
### Positive Observations
|
|
71
|
+
Briefly note any security best practices already well-implemented — this reinforces good habits.
|
|
72
|
+
|
|
73
|
+
## Guidelines
|
|
74
|
+
|
|
75
|
+
- Be precise and actionable. Avoid vague advice like "improve security." Always say exactly what to change and why.
|
|
76
|
+
- Distinguish between confirmed vulnerabilities and potential concerns that depend on context.
|
|
77
|
+
- If you lack sufficient context to determine severity, state your assumptions clearly.
|
|
78
|
+
- Do not overwhelm with noise — prioritize findings that have real security impact.
|
|
79
|
+
- When a pattern appears multiple times, note it once and indicate all affected locations.
|
|
80
|
+
- If the code looks secure, say so confidently rather than inventing issues.
|
|
81
|
+
|
|
82
|
+
**Update your agent memory** as you discover security patterns, recurring vulnerabilities, coding conventions, authentication/authorization patterns, and dependency usage in this codebase. This builds up institutional knowledge across conversations. Write concise notes about what you found and where.
|
|
83
|
+
|
|
84
|
+
Examples of what to record:
|
|
85
|
+
- Common input validation patterns used in the project
|
|
86
|
+
- Authentication and authorization mechanisms and their locations
|
|
87
|
+
- Cryptographic library usage and configuration patterns
|
|
88
|
+
- Known areas of technical debt with security implications
|
|
89
|
+
- Project-specific security conventions or middleware
|
|
90
|
+
|
|
91
|
+
# Persistent Agent Memory
|
|
92
|
+
|
|
93
|
+
You have a persistent Persistent Agent Memory directory at `/Users/chriskhoo/Documents/Code/bbprojects/std-pipeline/.claude/agent-memory/security-code-reviewer/`. Its contents persist across conversations.
|
|
94
|
+
|
|
95
|
+
As you work, consult your memory files to build on previous experience. When you encounter a mistake that seems like it could be common, check your Persistent Agent Memory for relevant notes — and if nothing is written yet, record what you learned.
|
|
96
|
+
|
|
97
|
+
Guidelines:
|
|
98
|
+
- `MEMORY.md` is always loaded into your system prompt — lines after 200 will be truncated, so keep it concise
|
|
99
|
+
- Create separate topic files (e.g., `debugging.md`, `patterns.md`) for detailed notes and link to them from MEMORY.md
|
|
100
|
+
- Update or remove memories that turn out to be wrong or outdated
|
|
101
|
+
- Organize memory semantically by topic, not chronologically
|
|
102
|
+
- Use the Write and Edit tools to update your memory files
|
|
103
|
+
|
|
104
|
+
What to save:
|
|
105
|
+
- Stable patterns and conventions confirmed across multiple interactions
|
|
106
|
+
- Key architectural decisions, important file paths, and project structure
|
|
107
|
+
- User preferences for workflow, tools, and communication style
|
|
108
|
+
- Solutions to recurring problems and debugging insights
|
|
109
|
+
|
|
110
|
+
What NOT to save:
|
|
111
|
+
- Session-specific context (current task details, in-progress work, temporary state)
|
|
112
|
+
- Information that might be incomplete — verify against project docs before writing
|
|
113
|
+
- Anything that duplicates or contradicts existing CLAUDE.md instructions
|
|
114
|
+
- Speculative or unverified conclusions from reading a single file
|
|
115
|
+
|
|
116
|
+
Explicit user requests:
|
|
117
|
+
- When the user asks you to remember something across sessions (e.g., "always use bun", "never auto-commit"), save it — no need to wait for multiple interactions
|
|
118
|
+
- When the user asks to forget or stop remembering something, find and remove the relevant entries from your memory files
|
|
119
|
+
- Since this memory is project-scope and shared with your team via version control, tailor your memories to this project
|
|
120
|
+
|
|
121
|
+
## MEMORY.md
|
|
122
|
+
|
|
123
|
+
Your MEMORY.md is currently empty. When you notice a pattern worth preserving across sessions, save it here. Anything in MEMORY.md will be included in your system prompt next time.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags: ["v*"]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
types: [opened, synchronize, reopened, ready_for_review]
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
lint:
|
|
12
|
+
name: PR Linting
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- name: Run Python quality checks
|
|
16
|
+
uses: thekhoo/github-actions-shared/.github/actions/python-linting@main
|
|
17
|
+
with:
|
|
18
|
+
python-version: 3.13
|
|
19
|
+
directories: datagovmy/ tests/
|
|
20
|
+
use-ruff: true
|
|
21
|
+
use-pyrefly: true
|
|
22
|
+
|
|
23
|
+
test:
|
|
24
|
+
name: PR Unit Tests
|
|
25
|
+
runs-on: ubuntu-latest
|
|
26
|
+
strategy:
|
|
27
|
+
fail-fast: false
|
|
28
|
+
matrix:
|
|
29
|
+
python-version: ["3.13", "3.14"]
|
|
30
|
+
steps:
|
|
31
|
+
- name: Run Python quality checks
|
|
32
|
+
uses: thekhoo/github-actions-shared/.github/actions/python-pytest@main
|
|
33
|
+
with:
|
|
34
|
+
python-version: ${{ matrix.python-version }}
|
|
35
|
+
test-directories: tests/
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
ci:
|
|
9
|
+
name: CI
|
|
10
|
+
uses: thekhoo/github-actions-shared/.github/workflows/python-ci.yml@main
|
|
11
|
+
with:
|
|
12
|
+
linting-python-version: "3.13"
|
|
13
|
+
pytest-python-versions: '["3.13", "3.14"]'
|
|
14
|
+
lint-directories: "datagovmy/ tests/"
|
|
15
|
+
use-ruff: true
|
|
16
|
+
use-pyrefly: true
|
|
17
|
+
test-directories: "tests/"
|
|
18
|
+
|
|
19
|
+
release:
|
|
20
|
+
name: Semantic Release
|
|
21
|
+
needs: ci
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
environment: production
|
|
24
|
+
permissions:
|
|
25
|
+
contents: write
|
|
26
|
+
id-token: write
|
|
27
|
+
steps:
|
|
28
|
+
- uses: actions/checkout@v4
|
|
29
|
+
with:
|
|
30
|
+
fetch-depth: 0
|
|
31
|
+
|
|
32
|
+
- name: Run semantic release
|
|
33
|
+
id: release
|
|
34
|
+
uses: thekhoo/github-actions-shared/.github/actions/python-semantic-release@main
|
|
35
|
+
with:
|
|
36
|
+
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
37
|
+
python-version: "3.13"
|
|
38
|
+
|
|
39
|
+
- name: Publish to PyPI
|
|
40
|
+
if: steps.release.outputs.released == 'true'
|
|
41
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
42
|
+
|
|
43
|
+
- name: Publish GitHub Release
|
|
44
|
+
if: steps.release.outputs.released == 'true'
|
|
45
|
+
uses: thekhoo/github-actions-shared/.github/actions/github-publish-release@main
|
|
46
|
+
with:
|
|
47
|
+
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
48
|
+
tag: ${{ steps.release.outputs.tag }}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
{
|
|
2
|
+
"task.allowAutomaticTasks": "on",
|
|
3
|
+
"editor.formatOnSave": true,
|
|
4
|
+
"[python]": {
|
|
5
|
+
"editor.formatOnSave": true,
|
|
6
|
+
"editor.defaultFormatter": "charliermarsh.ruff",
|
|
7
|
+
"editor.codeActionsOnSave": {
|
|
8
|
+
"source.fixAll.ruff": "explicit",
|
|
9
|
+
"source.organizeImports.ruff": "explicit"
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# CHANGELOG
|
|
2
|
+
|
|
3
|
+
<!-- version list -->
|
|
4
|
+
|
|
5
|
+
## v1.0.1 (2026-03-29)
|
|
6
|
+
|
|
7
|
+
### Bug Fixes
|
|
8
|
+
|
|
9
|
+
- Add environment to release
|
|
10
|
+
([`fd8ec63`](https://github.com/thekhoo/datagovmy-python-sdk/commit/fd8ec638979021ab677a66b3241b7cc34d7a19eb))
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
## v1.0.0 (2026-03-29)
|
|
14
|
+
|
|
15
|
+
- Initial Release
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# CLAUDE.md — datagovmy-python-sdk
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
|
|
5
|
+
Unofficial Python SDK for accessing open data from Malaysia's [data.gov.my](https://data.gov.my) platform via their [Open API](https://developer.data.gov.my/).
|
|
6
|
+
|
|
7
|
+
**Package name:** `mydata` (importable module in `mydata/`)
|
|
8
|
+
**Python:** >=3.13 (managed via `.python-version`)
|
|
9
|
+
**Build system:** Standard `pyproject.toml` (no build backend specified yet)
|
|
10
|
+
|
|
11
|
+
## API Reference (data.gov.my)
|
|
12
|
+
|
|
13
|
+
Base URL: `https://api.data.gov.my`
|
|
14
|
+
|
|
15
|
+
## Always do this
|
|
16
|
+
|
|
17
|
+
- When implementing a feature, always come up with a plan before making any code changes
|
|
18
|
+
- If requirements are not clear, always ask for clarity - never assume
|
|
19
|
+
- Always use TDD to implement new features
|
|
20
|
+
- After writing executable code, run the unit tests and linters and ensure all of them pass
|
|
21
|
+
- Always commit in small chunks. Unit tests and linters must pass before committing
|
|
22
|
+
- Be clear and concise with your code. I fthere are hidden implications, leave comments explaining why
|
|
23
|
+
- Make sure logs are written at difference code checkpoints. Do not be too verbose
|
|
24
|
+
- Use existing utility functions instead of re-implementing. Helper functions should be reusable in an appropriately named module
|
|
25
|
+
|
|
26
|
+
## Never do this
|
|
27
|
+
|
|
28
|
+
- Change main code when there are no unit tests that capture the functionality. add unit tests before making any changes
|
|
29
|
+
- Do not hardcode secrets or ARNs within the code. Any secrets should be taken from SSM
|
|
30
|
+
- Do not expose any tenanted information within the log messages
|
|
31
|
+
|
|
32
|
+
## Coding Conventions
|
|
33
|
+
|
|
34
|
+
### Environment
|
|
35
|
+
|
|
36
|
+
- All environment variable access should be done using helper functions in mydata/service/environment.py and not os.environ directly in the code.
|
|
37
|
+
|
|
38
|
+
### Endpoints
|
|
39
|
+
|
|
40
|
+
| Endpoint | Returns | Description |
|
|
41
|
+
| ---------------------------------------------- | -------- | --------------------------------------------- |
|
|
42
|
+
| `GET /data-catalogue?id=<id>` | JSON | National data catalogue datasets |
|
|
43
|
+
| `GET /opendosm?id=<id>` | JSON | Dept of Statistics (DOSM) datasets |
|
|
44
|
+
| `GET /weather/forecast` | JSON | 7-day weather forecast |
|
|
45
|
+
| `GET /weather/warning` | JSON | Weather warnings |
|
|
46
|
+
| `GET /weather/warning/earthquake` | JSON | Earthquake warnings |
|
|
47
|
+
| `GET /gtfs-static/<agency>` | ZIP | GTFS static feeds (ktmb, prasarana, mybas-\*) |
|
|
48
|
+
| `GET /gtfs-realtime/vehicle-position/<agency>` | Protobuf | Real-time vehicle positions |
|
|
49
|
+
|
|
50
|
+
### Authentication
|
|
51
|
+
|
|
52
|
+
- **Optional.** API works without a token (4 req/min).
|
|
53
|
+
- With token (10 req/min): `Authorization: Token <TOKEN>`
|
|
54
|
+
|
|
55
|
+
### Query Parameters (JSON endpoints)
|
|
56
|
+
|
|
57
|
+
| Param | Usage |
|
|
58
|
+
| ----------------------------------- | ---------------------------------------------------------- |
|
|
59
|
+
| `id` | Dataset identifier (required for data-catalogue/opendosm) |
|
|
60
|
+
| `limit` | Max records to return |
|
|
61
|
+
| `filter` / `ifilter` | Exact match (case-sensitive / insensitive): `value@column` |
|
|
62
|
+
| `contains` / `icontains` | Partial match: `value@column` |
|
|
63
|
+
| `range` | Numeric range: `column[begin:end]` |
|
|
64
|
+
| `sort` | Sort: `column,-column2` (dash = descending) |
|
|
65
|
+
| `include` / `exclude` | Select/omit columns |
|
|
66
|
+
| `date_start` / `date_end` | Date filter (`YYYY-MM-DD`) |
|
|
67
|
+
| `timestamp_start` / `timestamp_end` | Timestamp filter (`YYYY-MM-DD HH:MM:SS`) |
|
|
68
|
+
| `meta=true` | Include metadata wrapper in response |
|
|
69
|
+
|
|
70
|
+
Multiple filters: comma-separated. Nested fields: double underscores (e.g. `location__location_name`).
|
|
71
|
+
|
|
72
|
+
### Response Format
|
|
73
|
+
|
|
74
|
+
- Success: JSON array of records (or `{"meta": {}, "data": []}` with `meta=true`)
|
|
75
|
+
- Error: `{"status": <int>, "errors": [...]}`
|
|
76
|
+
- Status codes: 200, 400, 404, 429, 500
|
|
77
|
+
|
|
78
|
+
### Rate Limits
|
|
79
|
+
|
|
80
|
+
- Without token: **4 requests/minute**
|
|
81
|
+
- With token: **10 requests/minute**
|
|
82
|
+
- Exceeding returns HTTP 429
|
|
83
|
+
|
|
84
|
+
## Development Guidelines
|
|
85
|
+
|
|
86
|
+
- All endpoints are **read-only GET** requests — the SDK only needs HTTP GET support.
|
|
87
|
+
- Use `httpx` or `requests` as the HTTP client (add to `pyproject.toml` dependencies).
|
|
88
|
+
- The SDK should support both sync and async usage patterns.
|
|
89
|
+
- Handle rate limiting gracefully (retry with backoff on 429).
|
|
90
|
+
- Dataset IDs can be discovered at https://data.gov.my/data-catalogue or https://open.dosm.gov.my/data-catalogue.
|
|
91
|
+
|
|
92
|
+
## Commands
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
# Run the project
|
|
96
|
+
uv run main.py
|
|
97
|
+
|
|
98
|
+
# Run tests (once added)
|
|
99
|
+
uv run pytest
|
|
100
|
+
|
|
101
|
+
# Install dependencies
|
|
102
|
+
uv sync
|
|
103
|
+
```
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datagovmy-python-sdk
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: Add your description here
|
|
5
|
+
Requires-Python: >=3.13
|
|
6
|
+
Requires-Dist: requests
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
|
|
9
|
+
# Data.gov.my Python SDK
|
|
10
|
+
|
|
11
|
+
Unofficial SDK for accessing open source data on https://data.gov.my.
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install mydata
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Quick Start
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
from datagovmy import DataGovMyClient
|
|
23
|
+
|
|
24
|
+
client = DataGovMyClient()
|
|
25
|
+
|
|
26
|
+
# Fetch a dataset from the national data catalogue
|
|
27
|
+
data = client.data_catalogue.get_dataset_as_json(id="population_malaysia")
|
|
28
|
+
|
|
29
|
+
# Fetch with filters
|
|
30
|
+
data = client.data_catalogue.get_dataset_as_json(
|
|
31
|
+
id="population_malaysia",
|
|
32
|
+
filter="Selangor@location",
|
|
33
|
+
sort="-year",
|
|
34
|
+
limit=10
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Fetch DOSM data
|
|
38
|
+
data = client.opendosm.get_dataset_as_json(id="cpi_2d_category")
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Documentation
|
|
42
|
+
|
|
43
|
+
For detailed usage examples including all supported query filters (filtering, sorting, pagination, date ranges, column selection, and more), see the [Usage Guide](docs/usage.md).
|
|
44
|
+
|
|
45
|
+
## API Reference
|
|
46
|
+
|
|
47
|
+
This SDK wraps the [data.gov.my Open API](https://developer.data.gov.my/). Dataset IDs can be discovered at:
|
|
48
|
+
|
|
49
|
+
- [data.gov.my Data Catalogue](https://data.gov.my/data-catalogue)
|
|
50
|
+
- [OpenDOSM Data Catalogue](https://open.dosm.gov.my/data-catalogue)
|
|
51
|
+
|
|
52
|
+
## Rate Limits
|
|
53
|
+
|
|
54
|
+
- Without API key: **4 requests/minute**
|
|
55
|
+
- With API key: **10 requests/minute**
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
client = DataGovMyClient(api_key="your-api-key")
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## License
|
|
62
|
+
|
|
63
|
+
MIT
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Data.gov.my Python SDK
|
|
2
|
+
|
|
3
|
+
Unofficial SDK for accessing open source data on https://data.gov.my.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install mydata
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from datagovmy import DataGovMyClient
|
|
15
|
+
|
|
16
|
+
client = DataGovMyClient()
|
|
17
|
+
|
|
18
|
+
# Fetch a dataset from the national data catalogue
|
|
19
|
+
data = client.data_catalogue.get_dataset_as_json(id="population_malaysia")
|
|
20
|
+
|
|
21
|
+
# Fetch with filters
|
|
22
|
+
data = client.data_catalogue.get_dataset_as_json(
|
|
23
|
+
id="population_malaysia",
|
|
24
|
+
filter="Selangor@location",
|
|
25
|
+
sort="-year",
|
|
26
|
+
limit=10
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Fetch DOSM data
|
|
30
|
+
data = client.opendosm.get_dataset_as_json(id="cpi_2d_category")
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Documentation
|
|
34
|
+
|
|
35
|
+
For detailed usage examples including all supported query filters (filtering, sorting, pagination, date ranges, column selection, and more), see the [Usage Guide](docs/usage.md).
|
|
36
|
+
|
|
37
|
+
## API Reference
|
|
38
|
+
|
|
39
|
+
This SDK wraps the [data.gov.my Open API](https://developer.data.gov.my/). Dataset IDs can be discovered at:
|
|
40
|
+
|
|
41
|
+
- [data.gov.my Data Catalogue](https://data.gov.my/data-catalogue)
|
|
42
|
+
- [OpenDOSM Data Catalogue](https://open.dosm.gov.my/data-catalogue)
|
|
43
|
+
|
|
44
|
+
## Rate Limits
|
|
45
|
+
|
|
46
|
+
- Without API key: **4 requests/minute**
|
|
47
|
+
- With API key: **10 requests/minute**
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
client = DataGovMyClient(api_key="your-api-key")
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## License
|
|
54
|
+
|
|
55
|
+
MIT
|
|
File without changes
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Optional, cast
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
from requests.adapters import HTTPAdapter
|
|
6
|
+
from urllib3.util.retry import Retry
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseAPIClient:
|
|
12
|
+
"""Generic API client with configurable retry support.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
base_url: Base URL for all requests (e.g. "https://api.example.com").
|
|
16
|
+
headers: Default headers sent with every request.
|
|
17
|
+
retry_strategy: A ``urllib3.util.retry.Retry`` instance that controls
|
|
18
|
+
which status codes trigger retries, how many times, and the backoff.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
base_url: str,
|
|
24
|
+
headers: Optional[dict[str, str]] = None,
|
|
25
|
+
retry_strategy: Optional[Retry] = None,
|
|
26
|
+
):
|
|
27
|
+
self.base_url = base_url
|
|
28
|
+
self.session = requests.Session()
|
|
29
|
+
if headers:
|
|
30
|
+
self.session.headers.update(headers)
|
|
31
|
+
if retry_strategy:
|
|
32
|
+
adapter = HTTPAdapter(max_retries=cast(Any, retry_strategy))
|
|
33
|
+
self.session.mount("http://", adapter)
|
|
34
|
+
self.session.mount("https://", adapter)
|
|
35
|
+
|
|
36
|
+
def _build_url(self, path: str) -> str:
|
|
37
|
+
return f"{self.base_url}{path}"
|
|
38
|
+
|
|
39
|
+
def _request(self, method: str, path: str, **kwargs: Any) -> requests.Response:
|
|
40
|
+
url = self._build_url(path)
|
|
41
|
+
logger.debug("Requesting %s %s", method, url)
|
|
42
|
+
response = self.session.request(method, url, **kwargs)
|
|
43
|
+
response.raise_for_status()
|
|
44
|
+
return response
|
|
45
|
+
|
|
46
|
+
def get(
|
|
47
|
+
self,
|
|
48
|
+
path: str,
|
|
49
|
+
params: Optional[dict[str, Any]] = None,
|
|
50
|
+
headers: Optional[dict[str, str]] = None,
|
|
51
|
+
) -> requests.Response:
|
|
52
|
+
return self._request("GET", path, params=params, headers=headers)
|
|
53
|
+
|
|
54
|
+
def post(
|
|
55
|
+
self,
|
|
56
|
+
path: str,
|
|
57
|
+
json: Optional[Any] = None,
|
|
58
|
+
headers: Optional[dict[str, str]] = None,
|
|
59
|
+
) -> requests.Response:
|
|
60
|
+
return self._request("POST", path, json=json, headers=headers)
|
|
61
|
+
|
|
62
|
+
def put(
|
|
63
|
+
self,
|
|
64
|
+
path: str,
|
|
65
|
+
json: Optional[Any] = None,
|
|
66
|
+
headers: Optional[dict[str, str]] = None,
|
|
67
|
+
) -> requests.Response:
|
|
68
|
+
return self._request("PUT", path, json=json, headers=headers)
|
|
69
|
+
|
|
70
|
+
def delete(
|
|
71
|
+
self,
|
|
72
|
+
path: str,
|
|
73
|
+
params: Optional[dict[str, Any]] = None,
|
|
74
|
+
headers: Optional[dict[str, str]] = None,
|
|
75
|
+
) -> requests.Response:
|
|
76
|
+
return self._request("DELETE", path, params=params, headers=headers)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from datagovmy.service import DataCatalogueClient, OpenDOSMClient
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DataGovMyClient:
|
|
7
|
+
def __init__(self, api_key: Optional[str] = None):
|
|
8
|
+
self.api_key = api_key
|
|
9
|
+
self._data_catalogue: Optional[DataCatalogueClient] = None
|
|
10
|
+
self._opendosm: Optional[OpenDOSMClient] = None
|
|
11
|
+
|
|
12
|
+
@property
|
|
13
|
+
def data_catalogue(self) -> DataCatalogueClient:
|
|
14
|
+
if self._data_catalogue is None:
|
|
15
|
+
self._data_catalogue = DataCatalogueClient(api_key=self.api_key)
|
|
16
|
+
return self._data_catalogue
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def opendosm(self) -> OpenDOSMClient:
|
|
20
|
+
if self._opendosm is None:
|
|
21
|
+
self._opendosm = OpenDOSMClient(api_key=self.api_key)
|
|
22
|
+
return self._opendosm
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from datagovmy.core.api import BaseAPIClient
|
|
4
|
+
from datagovmy.service.environment import get_base_url
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DataCatalogueClient(BaseAPIClient):
|
|
8
|
+
ENDPOINT = "/data-catalogue"
|
|
9
|
+
|
|
10
|
+
def __init__(self, api_key: Optional[str] = None):
|
|
11
|
+
super().__init__(base_url=get_base_url())
|
|
12
|
+
self.api_key = api_key
|
|
13
|
+
|
|
14
|
+
def get_dataset_as_json(self, id: str, **kwargs):
|
|
15
|
+
# kwargs can be used to add additional query parameters for filtering
|
|
16
|
+
# reference: https://developer.data.gov.my/request-query
|
|
17
|
+
endpoint = f"{self.ENDPOINT}?id={id}"
|
|
18
|
+
filters = "&".join(f"{k}={v}" for k, v in kwargs.items())
|
|
19
|
+
|
|
20
|
+
if filters:
|
|
21
|
+
endpoint += f"&{filters}"
|
|
22
|
+
|
|
23
|
+
return self.get(endpoint).json()
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from datagovmy.core.api import BaseAPIClient
|
|
4
|
+
from datagovmy.service.environment import get_base_url
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class OpenDOSMClient(BaseAPIClient):
|
|
8
|
+
ENDPOINT = "/opendosm"
|
|
9
|
+
|
|
10
|
+
def __init__(self, api_key: Optional[str] = None):
|
|
11
|
+
super().__init__(base_url=get_base_url())
|
|
12
|
+
self.api_key = api_key
|
|
13
|
+
|
|
14
|
+
def get_dataset_as_json(self, id: str, **kwargs):
|
|
15
|
+
# kwargs can be used to add additional query parameters for filtering
|
|
16
|
+
# reference: https://developer.data.gov.my/request-query
|
|
17
|
+
endpoint = f"{self.ENDPOINT}?id={id}"
|
|
18
|
+
filters = "&".join(f"{k}={v}" for k, v in kwargs.items())
|
|
19
|
+
|
|
20
|
+
if filters:
|
|
21
|
+
endpoint += f"&{filters}"
|
|
22
|
+
|
|
23
|
+
return self.get(endpoint).json()
|